annotate csaw.R @ 6:ee07a679ac08 draft

Uploaded
author dktanwar
date Mon, 18 Dec 2017 12:10:21 -0500
parents aa29b20bbb45
children c2ec7fb35e97
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
1 ## How to run tool
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
2 # $ Rscript my_r_tool.R
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
3 # --input1 input1.csv
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
4 # --input2 input2.csv
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
5 # --output1 output.csv
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
6 # --output2 output2.csv
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
7
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
8 # Setup R error handling to go to stderr
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
9 options(show.error.messages=F, error=function(){cat(geterrmessage(),file=stderr());q("no",1,F)})
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
10 # We need to not crash galaxy with an UTF8 error on German LC settings.
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
11 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
12
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
13
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
14 library("csaw")
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
15 library("stringr")
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
16 library("data.table")
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
17 library("getopt")
3
ce3ad612a104 Uploaded
dktanwar
parents: 0
diff changeset
18 library("Rsamtools")
0
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
19
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
20
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
21 options(stringAsfactors = FALSE, useFancyQuotes = FALSE)
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
22 # Take in trailing command line arguments
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
23
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
24 output <- commandArgs(trailingOnly=TRUE)[2]
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
25 inputs <- commandArgs(trailingOnly=TRUE)[1]
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
26
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
27 print(output)
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
28 print(inputs)
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
29
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
30 # Separate multiple input files into a list of individual files
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
31 files <- unlist(strsplit(inputs, ','))
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
32
3
ce3ad612a104 Uploaded
dktanwar
parents: 0
diff changeset
33 # Index bamfiles
ce3ad612a104 Uploaded
dktanwar
parents: 0
diff changeset
34 indexBam(files = files)
ce3ad612a104 Uploaded
dktanwar
parents: 0
diff changeset
35
0
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
36 # Create windows and count reads in them ----
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
37 Sys.time()
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
38 windows <- windowCounts(files, spacing=150, width=200, bin=F)
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
39 Sys.time()
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
40
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
41 df <- data.frame(rowRanges(windows), stringsAsFactors = F)
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
42 df <- df[,c(1:3)]
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
43
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
44 file_names <- basename(data.frame(colData(windows))$bam.files)
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
45
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
46
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
47 # Final table with all windows and read counts ----
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
48 table <- data.frame(df, assay(windows), stringsAsFactors = F, check.names = F)
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
49 colnames(table)[4:ncol(table)] <- file_names
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
50
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
51 # Remove spaces in the table ----
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
52 setDT(table)
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
53 for (j in names(table)) set(table, j = j, value = table[[trimws(j)]])
6
ee07a679ac08 Uploaded
dktanwar
parents: 5
diff changeset
54 table_sp <- data.frame(table)
ee07a679ac08 Uploaded
dktanwar
parents: 5
diff changeset
55
0
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
56 # Save final table ----
6
ee07a679ac08 Uploaded
dktanwar
parents: 5
diff changeset
57 fwrite(x = table_sp, file = output, quote = F, row.names = F, sep = "\t")
5
aa29b20bbb45 Uploaded
dktanwar
parents: 3
diff changeset
58 dt <- table[,regions:=paste0(seqnames,"-", start, "-", end)]
aa29b20bbb45 Uploaded
dktanwar
parents: 3
diff changeset
59 table_sp <- data.frame(dt)
aa29b20bbb45 Uploaded
dktanwar
parents: 3
diff changeset
60
aa29b20bbb45 Uploaded
dktanwar
parents: 3
diff changeset
61 for(i in 4:(ncol(table_sp)-1)){
aa29b20bbb45 Uploaded
dktanwar
parents: 3
diff changeset
62 tmp <- table_sp[,c(ncol(table_sp), i)]
6
ee07a679ac08 Uploaded
dktanwar
parents: 5
diff changeset
63 n <- colnames(table_sp)[i]
ee07a679ac08 Uploaded
dktanwar
parents: 5
diff changeset
64 fwrite(x = tmp, file = n, quote = F, row.names = F, sep = "\t")
5
aa29b20bbb45 Uploaded
dktanwar
parents: 3
diff changeset
65 }
0
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
66
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
67 # # Save individual files ----
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
68 # Sys.time()
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
69 # r <- paste(table_sp[,1], table_sp[,2], table_sp[,3], sep = "-")
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
70 # Sys.time()
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
71 # # r <- apply( table_sp[ ,c(1:3)] , 1 , paste , sep = "-" )
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
72 #
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
73 # dir <- paste(opt$outdir, "counts_each_sample", sep = "/")
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
74 # dir.create(dir)
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
75 #
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
76 # # cores <- detectCores()
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
77 # # cl <- makeCluster(cores)
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
78 # # registerDoParallel(cl)
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
79 #
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
80 # tab <- data.frame(regions = r, table_sp[,4:ncol(table_sp)], stringsAsFactors = F, check.names = F)
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
81 #
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
82 # # foreach(i = 2:ncol(tab)) %dopar% {
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
83 # for(i in 2:ncol(tab)){
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
84 # print(i)
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
85 # tmp <- data.frame(tab[,c(1,i)], stringsAsFactors = F, check.names = F)
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
86 # n <- paste(dir, "/", colnames(tab)[i], ".txt", sep = "")
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
87 # # write.table(tmp, xzfile(paste(dir, "/", n, ".txt.xz", sep = "")), sep = "\t", quote = F, row.names = F)
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
88 # fwrite(x = tmp, file = n, quote = F, row.names = F, sep = "\t")
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
89 # system(paste0("xz -3 -T 12 ", n))
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
90 # }
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
91 # # stopCluster(cl)
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
92
66356a1014b1 Uploaded
dktanwar
parents:
diff changeset
93 sessionInfo()