Mercurial > repos > petr-novak > repeatrxplorer
comparison lib/tarean/tarean_batch_mode.R @ 0:1d1b9e1b2e2f draft
Uploaded
author | petr-novak |
---|---|
date | Thu, 19 Dec 2019 10:24:45 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1d1b9e1b2e2f |
---|---|
1 #!/usr/bin/env Rscript | |
2 library(optparse, quiet = TRUE) | |
3 library(parallel) | |
4 initial.options <- commandArgs(trailingOnly = FALSE) | |
5 file.arg.name <- "--file=" | |
6 script.name <- sub(file.arg.name,"", | |
7 initial.options[grep(file.arg.name, initial.options)] | |
8 ) | |
9 script.dir <- normalizePath(dirname(script.name)) | |
10 oridir=getwd() | |
11 options(OGDF = paste0(script.dir,"/OGDF/runOGDFlayout2015.5")) | |
12 CPU = detectCores() | |
13 source(paste(script.dir,"/","methods.R", sep='')) | |
14 source(paste(script.dir,"/","logo_methods.R", sep='')) | |
15 source(paste(script.dir,"/","htmlheader.R", sep='')) | |
16 | |
17 option_list = list( | |
18 make_option(c('-i', '--input_sequences_list'), | |
19 action='store',type='character', | |
20 help='list of fasta sequences file for tarean analysis' | |
21 ), | |
22 make_option(c('-o', '--output_dir'), | |
23 action='store',type='character', | |
24 help='output directory', | |
25 default="./kmer_analysis"), | |
26 make_option(c('-t', '--tRNA_database'), | |
27 action='store',type='character', | |
28 help='path to tRNA database', | |
29 default=NULL), | |
30 make_option(c('-p', '--parallel'), | |
31 action='store_true', | |
32 type='logical', | |
33 help='run in parallel (faster but can exhaust RAM)', | |
34 default=FALSE), | |
35 make_option(c('-N', '--not_paired'), | |
36 action='store_true', | |
37 type='logical', | |
38 help='reads are not paired', | |
39 default=FALSE) | |
40 | |
41 ) | |
42 | |
43 description = paste (strwrap(" put decription here"), collapse ="\n") | |
44 epilogue = paste (strwrap(" put epilogue here"), collapse ="\n") | |
45 parser=OptionParser( | |
46 option_list=option_list, | |
47 epilogue=epilogue, | |
48 description=description, | |
49 ) | |
50 | |
51 opt = parse_args(parser, args=commandArgs(TRUE)) | |
52 paired = !opt$not_paired | |
53 print(opt) | |
54 dir.create(opt$output_dir) | |
55 fl = readLines(opt$input_sequences_list) | |
56 ## reorder to avoid running large top graphs at once | |
57 ord = sample(seq_along(fl), length(fl)) | |
58 | |
59 | |
60 index=0 | |
61 info=list() | |
62 save.image(paste0(opt$output_dir,"/info.RData")) # for debugin purposes | |
63 if (opt$parallel){ | |
64 cat("processing in parallel") | |
65 info=mcmapply( | |
66 FUN=tarean, | |
67 input_sequences = fl[ord], | |
68 output_dir = paste0(opt$output_dir,"/",sprintf("%04d",ord)), | |
69 min_kmer_length = 11, | |
70 max_kmer_length = 27, | |
71 CPU = CPU, | |
72 sample_size = 30000, | |
73 reorient_reads = TRUE, | |
74 tRNA_database_path = opt$tRNA_database, | |
75 paired = paired, | |
76 include_layout=FALSE, | |
77 mc.cores=round(1+detectCores()/9), | |
78 mc.set.seed = TRUE, | |
79 mc.preschedule = FALSE, | |
80 SIMPLIFY = FALSE | |
81 ) | |
82 }else{ | |
83 for (i in fl){ | |
84 index = index + 1 | |
85 dirout=paste0(opt$output_dir,"/",sprintf("%04d",index)) | |
86 try({ | |
87 info[[i]] = tarean(i, dirout, 11, 27, CPU, 30000, TRUE, opt$tRNA_database, include_layout=FALSE) | |
88 cat("-----------------------------------------------------\n") | |
89 print(info[[i]]) | |
90 }) | |
91 } | |
92 } | |
93 save(info, file = paste0(opt$output_dir,"/info.RData")) | |
94 save.image("tmp.RData") | |
95 ## export as csv table | |
96 ## 'graph_info' is always include: | |
97 | |
98 tr_info = data.frame(do.call(rbind, info[sapply(info,length)>1])) | |
99 if (nrow(tr_info)>0){ | |
100 ## TR detected | |
101 graph_info = data.frame (do.call(rbind, lapply(info, "[[", "graph_info"))) | |
102 graph_info$source=rownames(graph_info) | |
103 tr_info$graph_info=NULL | |
104 tr_info$source = rownames(tr_info) | |
105 graph_tr_info = merge(graph_info, tr_info, all=TRUE, by='source') | |
106 if (any(sapply(graph_tr_info,class)=='list')){ | |
107 for (i in colnames(graph_tr_info)){ | |
108 graph_tr_info[,i] = unname(unlist(graph_tr_info[,i])) | |
109 } | |
110 } | |
111 write.table(graph_tr_info, file=paste0(opt$output_dir,"/info.csv"), row.names=FALSE,sep="\t", quote= TRUE) | |
112 }else{ | |
113 ## TR not detected | |
114 graph_info = data.frame (do.call(rbind, lapply(info, function(x) unlist(x[['graph_info']])))) | |
115 graph_info$source=rownames(graph_info) | |
116 write.table(graph_info, file=paste0(opt$output_dir,"/info.csv"), row.names=FALSE,sep="\t", quote = FALSE) | |
117 } | |
118 | |
119 |