Converting .csv/txt file into .mtx file

61 Views Asked by At

I have the below code to convert a .csv/txt file into a .mtx file, a txt file containing row names, and a txt file containing column names. However, when I run the code on a csv file, the output folder is empty. Where can the error be? Much appreciated.

#########################################################################################
# IMPORT DATA

## Gene names are stored in rows
## Sample names are stored in columns

#########################################################################################
# IMPORT LIBRARIES

library(data.table)
library(DropletUtils)
library(Matrix)

#########################################################################################
# DEFINE FUNCTIONS

clean_dt_colnames <- function(dt, clean_barcodes) {setnames(dt, base::colnames(dt), clean_barcodes)}

make_sample_barcode_tab <- function(dt, sample_regex = NA) {samp_bc <- colnames(dt)

  if (!is.na(sample_regex)) {
    sample_names <- gsub(sample_regex, "\\1", samp_bc)
    barcodes <- gsub(sample_regex, "\\2", samp_bc)

    clean_dt_colnames(dt, barcodes)
  } else {
    barcodes <- samp_bc
    sample_names <- rep_len("single_sample", length(barcodes))}

  # first var in dt is the gene_names var (data.tables don't have rownames)
  data.table(
    sample = sample_names[-1],
    barcode = barcodes[-1])}

list_barcodes_in_sample <- function(sample_barcode_tab) {
  # nest each barcode group to separate data.table
  nested_sample_dt <- sample_barcode_tab[, .(bc_list = list(.SD)), by = sample]

  # convert nested data table to list
  lapply(nested_sample_dt[["bc_list"]], unlist)}

sub_dt <- function(columns, dt) {
  # subset a data table by character vector, to ease lapply
  columns <- c("V1", columns)
  dt[, ..columns]}

export_demultiplexed_data <- function(sample_dt, sparse_matrix_list, data_dir) {

  nested_sample_dt <- sample_dt[, .(bc_list = list(.SD)), by = sample]

  for (row in 1:nrow(nested_sample_dt)) {
    fname <- file.path(data_dir, "out", nested_sample_dt[row][["sample"]])

    # unnest barcodes in sample
    expected_barcodes_in_sample <- nested_sample_dt[row, bc_list[[1]]][["barcode"]]

    if (!identical(expected_barcodes_in_sample, colnames(sparse_matrix_list[[row]]))) {
      stop("not the same barcodes")
    }

    DropletUtils::write10xCounts(fname,
      sparse_matrix_list[[row]],
      version = "3")}}

#########################################################################################
# DEFINE FILES & FOLDERS

data_dir <- "./"
setwd(data_dir)
csv_files <- list.files(data_dir, pattern = "*[ct]sv$")

print(csv_files)

output_dir <- file.path(data_dir, "out")
dir.create(output_dir)

#########################################################################################
# INSPECT DATA

csv_example <- count_data

# Look at the general structure of the matrix.
str(csv_example)

# print the column names, usually the barcodes
colnames(csv_example)

# print the first 20 rows of the first column (usually gene names)
head(csv_example[, 1], 20)

sample_regex <- NA

#########################################################################################
# PROCESS FILES

for (file in csv_files) {
  csv_table <- fread(file)
  setnames(csv_table, old = 1, new = "V1")

  sample_tab <- make_sample_barcode_tab(csv_table, sample_regex)

  gc()

  # subset the original count data.table, separating by samples if present
  dt_subset <- lapply(list_barcodes_in_sample(sample_tab), sub_dt, csv_table)
  rm(csv_table)
  gc()

  # convert each subsetted count data.table to count matrix
  counts <- lapply(dt_subset, as.matrix, rownames = "V1")
  rm(dt_subset)
  gc()

  # convert each count matrix to sparse matrices
  sparse_counts <- lapply(counts, Matrix, sparse = T)
  rm(counts)
  gc()

  # export the data to one folder per sample
  export_demultiplexed_data(sample_tab, sparse_counts, data_dir)}

(This code is adapted from the code in this link: https://www.biomage.net/blog/converting-csv-to-upload-to-cellenics)

0

There are 0 best solutions below