# HG changeset patch # User dktanwar # Date 1513004612 18000 # Node ID 66356a1014b1c1b2eabea2f5af097a22bbdc760b Uploaded diff -r 000000000000 -r 66356a1014b1 csaw.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/csaw.R Mon Dec 11 10:03:32 2017 -0500 @@ -0,0 +1,82 @@ +## How to run tool +# $ Rscript my_r_tool.R +# --input1 input1.csv +# --input2 input2.csv +# --output1 output.csv +# --output2 output2.csv + +# Setup R error handling to go to stderr +options(show.error.messages=F, error=function(){cat(geterrmessage(),file=stderr());q("no",1,F)}) +# We need to not crash galaxy with an UTF8 error on German LC settings. +loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") + + +library("csaw") +library("stringr") +library("data.table") +library("getopt") + + +options(stringAsfactors = FALSE, useFancyQuotes = FALSE) +# Take in trailing command line arguments + +output <- commandArgs(trailingOnly=TRUE)[2] +inputs <- commandArgs(trailingOnly=TRUE)[1] + +print(output) +print(inputs) + +# Separate multiple input files into a list of individual files +files <- unlist(strsplit(inputs, ',')) + +# Create windows and count reads in them ---- +Sys.time() +windows <- windowCounts(files, spacing=150, width=200, bin=F) +Sys.time() + +df <- data.frame(rowRanges(windows), stringsAsFactors = F) +df <- df[,c(1:3)] + +file_names <- basename(data.frame(colData(windows))$bam.files) + + +# Final table with all windows and read counts ---- +table <- data.frame(df, assay(windows), stringsAsFactors = F, check.names = F) +colnames(table)[4:ncol(table)] <- file_names + + +# Remove spaces in the table ---- +setDT(table) +for (j in names(table)) set(table, j = j, value = table[[trimws(j)]]) +table_sp <- data.frame(table) + +# Save final table ---- +fwrite(x = table_sp, file = output, quote = F, row.names = F, sep = "\t") + +# # Save individual files ---- +# Sys.time() +# r <- paste(table_sp[,1], table_sp[,2], table_sp[,3], sep = "-") +# Sys.time() +# # r <- apply( table_sp[ ,c(1:3)] , 1 , paste , sep = "-" ) +# +# dir <- paste(opt$outdir, "counts_each_sample", sep = "/") +# dir.create(dir) +# +# # cores <- detectCores() +# # cl <- makeCluster(cores) +# # registerDoParallel(cl) +# +# tab <- data.frame(regions = r, table_sp[,4:ncol(table_sp)], stringsAsFactors = F, check.names = F) +# +# # foreach(i = 2:ncol(tab)) %dopar% { +# for(i in 2:ncol(tab)){ +# print(i) +# tmp <- data.frame(tab[,c(1,i)], stringsAsFactors = F, check.names = F) +# n <- paste(dir, "/", colnames(tab)[i], ".txt", sep = "") +# # write.table(tmp, xzfile(paste(dir, "/", n, ".txt.xz", sep = "")), sep = "\t", quote = F, row.names = F) +# fwrite(x = tmp, file = n, quote = F, row.names = F, sep = "\t") +# system(paste0("xz -3 -T 12 ", n)) +# } +# # stopCluster(cl) + +sessionInfo() \ No newline at end of file