view csaw.R @ 8:100d5eb7bd4d draft

Uploaded
author dktanwar
date Mon, 18 Dec 2017 12:31:46 -0500
parents c2ec7fb35e97
children
line wrap: on
line source

## How to run tool
# $ Rscript my_r_tool.R 
#            --input1 input1.csv 
#            --input2 input2.csv 
#            --output1 output.csv 
#            --output2 output2.csv

# Setup R error handling to go to stderr
options(show.error.messages=F, error=function(){cat(geterrmessage(),file=stderr());q("no",1,F)})
# We need to not crash galaxy with an UTF8 error on German LC settings.
loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")


library("csaw")
library("stringr")
library("data.table")
library("getopt")
library("Rsamtools")


options(stringAsfactors = FALSE, useFancyQuotes = FALSE)
# Take in trailing command line arguments

output <- commandArgs(trailingOnly=TRUE)[2]
inputs <- commandArgs(trailingOnly=TRUE)[1]

print(output)
print(inputs)

# Separate multiple input files into a list of individual files
files <- unlist(strsplit(inputs, ','))

# Index bamfiles
indexBam(files = files)

# Create windows and count reads in them ----
Sys.time()
windows <- windowCounts(files, spacing=150, width=200, bin=F)
Sys.time()

df <- data.frame(rowRanges(windows), stringsAsFactors = F)
df <- df[,c(1:3)]

file_names <- basename(data.frame(colData(windows))$bam.files)


# Final table with all windows and read counts ----
table <- data.frame(df, assay(windows), stringsAsFactors = F, check.names = F)
colnames(table)[4:ncol(table)] <- file_names

# Remove spaces in the table ----
setDT(table)
for (j in names(table)) set(table, j = j, value = table[[trimws(j)]]) 
table_sp <- data.frame(table)
  
# Save final table ----
fwrite(x = table_sp, file = output, quote = F, row.names = F, sep = "\t")
dt <- table[,regions:=paste0(seqnames,"-", start, "-", end)]
table_sp <- data.frame(dt)

for(i in 4:(ncol(table_sp)-1)){
  tmp <- table_sp[,c(ncol(table_sp), i)]
  n <- paste0(colnames(table_sp)[i],".tsv")
  fwrite(x = tmp, file = n, quote = F, row.names = F, sep = "\t")
}

# # Save individual files ----
# Sys.time()
# r <- paste(table_sp[,1], table_sp[,2], table_sp[,3], sep = "-")
# Sys.time()
# # r <- apply( table_sp[ ,c(1:3)] , 1 , paste , sep = "-" )
# 
# dir <- paste(opt$outdir, "counts_each_sample", sep = "/")
# dir.create(dir)
# 
# # cores <- detectCores()
# # cl <- makeCluster(cores)
# # registerDoParallel(cl)
# 
# tab <- data.frame(regions = r, table_sp[,4:ncol(table_sp)], stringsAsFactors = F, check.names = F)
# 
# # foreach(i = 2:ncol(tab)) %dopar% {
# for(i in 2:ncol(tab)){
#   print(i)
#   tmp <- data.frame(tab[,c(1,i)], stringsAsFactors = F, check.names = F)
#   n <- paste(dir, "/", colnames(tab)[i], ".txt", sep = "")
#   # write.table(tmp, xzfile(paste(dir, "/", n, ".txt.xz", sep = "")), sep = "\t", quote = F, row.names = F)
#   fwrite(x = tmp, file = n, quote = F, row.names = F, sep = "\t")
#   system(paste0("xz -3 -T 12 ", n))
# }
# # stopCluster(cl)

sessionInfo()