diff lib/tarean/tarean_batch_mode.R @ 0:1d1b9e1b2e2f draft

Uploaded
author petr-novak
date Thu, 19 Dec 2019 10:24:45 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/tarean/tarean_batch_mode.R	Thu Dec 19 10:24:45 2019 -0500
@@ -0,0 +1,119 @@
+#!/usr/bin/env Rscript
+library(optparse, quiet = TRUE)
+library(parallel)
+initial.options <- commandArgs(trailingOnly = FALSE)
+file.arg.name <- "--file="
+script.name <- sub(file.arg.name,"",
+                   initial.options[grep(file.arg.name, initial.options)]
+)
+script.dir <- normalizePath(dirname(script.name))
+oridir=getwd()
+options(OGDF = paste0(script.dir,"/OGDF/runOGDFlayout2015.5"))
+CPU =  detectCores()
+source(paste(script.dir,"/","methods.R", sep=''))
+source(paste(script.dir,"/","logo_methods.R", sep=''))
+source(paste(script.dir,"/","htmlheader.R", sep=''))
+
+option_list = list(
+    make_option(c('-i', '--input_sequences_list'),
+                action='store',type='character',
+                help='list of fasta sequences file for tarean analysis'
+                ),
+    make_option(c('-o', '--output_dir'),
+                action='store',type='character',
+                help='output directory',
+                default="./kmer_analysis"),
+    make_option(c('-t', '--tRNA_database'),
+                action='store',type='character',
+                help='path to tRNA database',
+                default=NULL),
+    make_option(c('-p', '--parallel'),
+                action='store_true',
+                type='logical',
+                help='run in parallel (faster but can exhaust RAM)',
+                default=FALSE),
+    make_option(c('-N', '--not_paired'),
+                action='store_true',
+                type='logical',
+                help='reads are not paired',
+                default=FALSE)
+
+    )
+
+description = paste (strwrap(" put decription here"), collapse ="\n")
+epilogue = paste (strwrap(" put epilogue here"), collapse ="\n")
+parser=OptionParser(
+    option_list=option_list,
+    epilogue=epilogue,
+    description=description,
+    )
+
+opt = parse_args(parser, args=commandArgs(TRUE))
+paired = !opt$not_paired
+print(opt)
+dir.create(opt$output_dir)
+fl = readLines(opt$input_sequences_list)
+## reorder to avoid running large top graphs at once
+ord = sample(seq_along(fl), length(fl))
+
+
+index=0
+info=list()
+save.image(paste0(opt$output_dir,"/info.RData")) # for debugin purposes
+if (opt$parallel){
+    cat("processing in parallel")
+    info=mcmapply(
+        FUN=tarean,
+        input_sequences = fl[ord],
+        output_dir = paste0(opt$output_dir,"/",sprintf("%04d",ord)),
+        min_kmer_length = 11,
+        max_kmer_length = 27,
+        CPU = CPU,
+        sample_size = 30000,
+        reorient_reads = TRUE,
+        tRNA_database_path = opt$tRNA_database,
+        paired = paired,
+        include_layout=FALSE,
+        mc.cores=round(1+detectCores()/9),
+        mc.set.seed = TRUE,
+        mc.preschedule = FALSE,
+        SIMPLIFY = FALSE
+    )
+}else{
+    for (i in fl){
+        index = index + 1
+        dirout=paste0(opt$output_dir,"/",sprintf("%04d",index))
+        try({
+            info[[i]] = tarean(i, dirout, 11, 27, CPU, 30000, TRUE, opt$tRNA_database, include_layout=FALSE)
+            cat("-----------------------------------------------------\n")
+            print(info[[i]])
+        })
+    }
+}
+save(info, file = paste0(opt$output_dir,"/info.RData"))
+save.image("tmp.RData")
+## export as csv table
+## 'graph_info' is always include:
+
+tr_info = data.frame(do.call(rbind, info[sapply(info,length)>1]))
+if (nrow(tr_info)>0){
+    ## TR detected
+    graph_info = data.frame (do.call(rbind, lapply(info, "[[", "graph_info")))
+    graph_info$source=rownames(graph_info)
+    tr_info$graph_info=NULL
+    tr_info$source = rownames(tr_info)
+    graph_tr_info = merge(graph_info, tr_info, all=TRUE, by='source')
+    if (any(sapply(graph_tr_info,class)=='list')){
+        for (i in colnames(graph_tr_info)){
+            graph_tr_info[,i] = unname(unlist(graph_tr_info[,i]))
+        }
+    }
+    write.table(graph_tr_info, file=paste0(opt$output_dir,"/info.csv"), row.names=FALSE,sep="\t", quote= TRUE)
+}else{
+    ## TR not detected
+    graph_info = data.frame (do.call(rbind, lapply(info, function(x) unlist(x[['graph_info']]))))
+    graph_info$source=rownames(graph_info)
+    write.table(graph_info, file=paste0(opt$output_dir,"/info.csv"), row.names=FALSE,sep="\t", quote = FALSE)
+}
+
+