comparison csaw.R @ 0:66356a1014b1 draft

Uploaded
author dktanwar
date Mon, 11 Dec 2017 10:03:32 -0500
parents
children ce3ad612a104
comparison
equal deleted inserted replaced
-1:000000000000 0:66356a1014b1
1 ## How to run tool
2 # $ Rscript my_r_tool.R
3 # --input1 input1.csv
4 # --input2 input2.csv
5 # --output1 output.csv
6 # --output2 output2.csv
7
8 # Setup R error handling to go to stderr
9 options(show.error.messages=F, error=function(){cat(geterrmessage(),file=stderr());q("no",1,F)})
10 # We need to not crash galaxy with an UTF8 error on German LC settings.
11 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
12
13
14 library("csaw")
15 library("stringr")
16 library("data.table")
17 library("getopt")
18
19
20 options(stringAsfactors = FALSE, useFancyQuotes = FALSE)
21 # Take in trailing command line arguments
22
23 output <- commandArgs(trailingOnly=TRUE)[2]
24 inputs <- commandArgs(trailingOnly=TRUE)[1]
25
26 print(output)
27 print(inputs)
28
29 # Separate multiple input files into a list of individual files
30 files <- unlist(strsplit(inputs, ','))
31
32 # Create windows and count reads in them ----
33 Sys.time()
34 windows <- windowCounts(files, spacing=150, width=200, bin=F)
35 Sys.time()
36
37 df <- data.frame(rowRanges(windows), stringsAsFactors = F)
38 df <- df[,c(1:3)]
39
40 file_names <- basename(data.frame(colData(windows))$bam.files)
41
42
43 # Final table with all windows and read counts ----
44 table <- data.frame(df, assay(windows), stringsAsFactors = F, check.names = F)
45 colnames(table)[4:ncol(table)] <- file_names
46
47
48 # Remove spaces in the table ----
49 setDT(table)
50 for (j in names(table)) set(table, j = j, value = table[[trimws(j)]])
51 table_sp <- data.frame(table)
52
53 # Save final table ----
54 fwrite(x = table_sp, file = output, quote = F, row.names = F, sep = "\t")
55
56 # # Save individual files ----
57 # Sys.time()
58 # r <- paste(table_sp[,1], table_sp[,2], table_sp[,3], sep = "-")
59 # Sys.time()
60 # # r <- apply( table_sp[ ,c(1:3)] , 1 , paste , sep = "-" )
61 #
62 # dir <- paste(opt$outdir, "counts_each_sample", sep = "/")
63 # dir.create(dir)
64 #
65 # # cores <- detectCores()
66 # # cl <- makeCluster(cores)
67 # # registerDoParallel(cl)
68 #
69 # tab <- data.frame(regions = r, table_sp[,4:ncol(table_sp)], stringsAsFactors = F, check.names = F)
70 #
71 # # foreach(i = 2:ncol(tab)) %dopar% {
72 # for(i in 2:ncol(tab)){
73 # print(i)
74 # tmp <- data.frame(tab[,c(1,i)], stringsAsFactors = F, check.names = F)
75 # n <- paste(dir, "/", colnames(tab)[i], ".txt", sep = "")
76 # # write.table(tmp, xzfile(paste(dir, "/", n, ".txt.xz", sep = "")), sep = "\t", quote = F, row.names = F)
77 # fwrite(x = tmp, file = n, quote = F, row.names = F, sep = "\t")
78 # system(paste0("xz -3 -T 12 ", n))
79 # }
80 # # stopCluster(cl)
81
82 sessionInfo()