diff csaw.R @ 0:66356a1014b1 draft

Uploaded
author dktanwar
date Mon, 11 Dec 2017 10:03:32 -0500
parents
children ce3ad612a104
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/csaw.R	Mon Dec 11 10:03:32 2017 -0500
@@ -0,0 +1,82 @@
+## How to run tool
+# $ Rscript my_r_tool.R 
+#            --input1 input1.csv 
+#            --input2 input2.csv 
+#            --output1 output.csv 
+#            --output2 output2.csv
+
+# Setup R error handling to go to stderr
+options(show.error.messages=F, error=function(){cat(geterrmessage(),file=stderr());q("no",1,F)})
+# We need to not crash galaxy with an UTF8 error on German LC settings.
+loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
+
+
+library("csaw")
+library("stringr")
+library("data.table")
+library("getopt")
+
+
+options(stringAsfactors = FALSE, useFancyQuotes = FALSE)
+# Take in trailing command line arguments
+
+output <- commandArgs(trailingOnly=TRUE)[2]
+inputs <- commandArgs(trailingOnly=TRUE)[1]
+
+print(output)
+print(inputs)
+
+# Separate multiple input files into a list of individual files
+files <- unlist(strsplit(inputs, ','))
+
+# Create windows and count reads in them ----
+Sys.time()
+windows <- windowCounts(files, spacing=150, width=200, bin=F)
+Sys.time()
+
+df <- data.frame(rowRanges(windows), stringsAsFactors = F)
+df <- df[,c(1:3)]
+
+file_names <- basename(data.frame(colData(windows))$bam.files)
+
+
+# Final table with all windows and read counts ----
+table <- data.frame(df, assay(windows), stringsAsFactors = F, check.names = F)
+colnames(table)[4:ncol(table)] <- file_names
+
+
+# Remove spaces in the table ----
+setDT(table)
+for (j in names(table)) set(table, j = j, value = table[[trimws(j)]]) 
+table_sp <- data.frame(table)
+
+# Save final table ----
+fwrite(x = table_sp, file = output, quote = F, row.names = F, sep = "\t")
+
+# # Save individual files ----
+# Sys.time()
+# r <- paste(table_sp[,1], table_sp[,2], table_sp[,3], sep = "-")
+# Sys.time()
+# # r <- apply( table_sp[ ,c(1:3)] , 1 , paste , sep = "-" )
+# 
+# dir <- paste(opt$outdir, "counts_each_sample", sep = "/")
+# dir.create(dir)
+# 
+# # cores <- detectCores()
+# # cl <- makeCluster(cores)
+# # registerDoParallel(cl)
+# 
+# tab <- data.frame(regions = r, table_sp[,4:ncol(table_sp)], stringsAsFactors = F, check.names = F)
+# 
+# # foreach(i = 2:ncol(tab)) %dopar% {
+# for(i in 2:ncol(tab)){
+#   print(i)
+#   tmp <- data.frame(tab[,c(1,i)], stringsAsFactors = F, check.names = F)
+#   n <- paste(dir, "/", colnames(tab)[i], ".txt", sep = "")
+#   # write.table(tmp, xzfile(paste(dir, "/", n, ".txt.xz", sep = "")), sep = "\t", quote = F, row.names = F)
+#   fwrite(x = tmp, file = n, quote = F, row.names = F, sep = "\t")
+#   system(paste0("xz -3 -T 12 ", n))
+# }
+# # stopCluster(cl)
+
+sessionInfo()
\ No newline at end of file