Mercurial > repos > lgueguen > sartools
view template_script_DESeq2_CL.r @ 0:581d217c7337 draft
Planemo upload
author | lgueguen |
---|---|
date | Fri, 22 Jul 2016 05:39:13 -0400 |
parents | |
children | de6d0b7c17af |
line wrap: on
line source
#!/local/gensoft2/exe/R/3.1.2/bin/Rscript # to run this script, use one of these commands: # Rscript --no-save --no-restore --verbose template_script_DESeq2_CL.r -r raw -v group -c T0 > log.txt 2>&1 # Rscript template_script_DESeq2_CL.r -r raw -v group -c T0 # to get help: # Rscript template_script_DESeq2_CL.r --help ################################################################################ ### R script to compare several conditions with the SARTools and DESeq2 packages ### Hugo Varet ### April 20th, 2015 ### designed to be executed with SARTools 1.1.0 ################################################################################ rm(list=ls()) # remove all the objects from the R session library(optparse) # to run the script in command lines # options list with associated default value. option_list <- list( make_option(c("-P", "--projectName"), default=basename(getwd()), dest="projectName", help="name of the project used for the report [default: name of the current directory]."), make_option(c("-A", "--author"), default=Sys.info()[7], dest="author", help="name of the report author [default: %default]."), make_option(c("-t", "--targetFile"), default="target.txt", dest="targetFile", help="path to the design/target file [default: %default]."), make_option(c("-r", "--rawDir"), default="raw", dest="rawDir", help="path to the directory containing the HTSeq files [default: %default]."), make_option(c("-F", "--featuresToRemove"), default="alignment_not_unique,ambiguous,no_feature,not_aligned,too_low_aQual", dest="FTR", help="names of the features to be removed, more than once can be specified [default: %default]"), make_option(c("-v", "--varInt"), default="group", dest="varInt", help="factor of interest [default: %default]"), make_option(c("-c", "--condRef"), default="WT", dest="condRef", help="reference biological condition [default: %default]"), make_option(c("-b", "--batch"), default=NULL, dest="batch", help="blocking factor [default: %default] or \"batch\" for example"), make_option(c("-f", "--fitType"), default="parametric", dest="fitType", help="mean-variance relationship: [default: %default] or local"), make_option(c("-o", "--cooksCutoff"), default=TRUE, dest="cooksCutoff", help="perform the outliers detection (default is TRUE)"), make_option(c("-i", "--independentFiltering"), default=TRUE, dest="independentFiltering", help="perform independent filtering (default is TRUE)"), make_option(c("-a", "--alpha"), default=0.05, dest="alpha", help="threshold of statistical significance [default: %default]"), make_option(c("-p", "--pAdjustMethod"), default="BH", dest="pAdjustMethod", help="p-value adjustment method: \"BH\" or \"BY\" [default: %default]"), make_option(c("-T", "--typeTrans"), default="VST", dest="typeTrans", help="transformation for PCA/clustering: \"VST\" ou \"rlog\" [default: %default]"), make_option(c("-l", "--locfunc"), default="median", dest="locfunc", help="median or shorth to estimate the size factors [default: %default]"), make_option(c("-C", "--colors"), default="dodgerblue,firebrick1,MediumVioletRed,SpringGreen,chartreuse,cyan,darkorchid,darkorange", dest="cols", help="colors of each biological condition on the plots\n\t\t\"col1,col2,col3,col4\"\n\t\t[default: %default]") ) # now parse the command line to check which option is given and get associated values parser <- OptionParser(usage="usage: %prog [options]", option_list=option_list, description="Compare two or more biological conditions in a RNA-Seq framework with DESeq2.", epilogue="For comments, bug reports etc... please contact Hugo Varet <hugo.varet@pasteur.fr>") opt <- parse_args(parser, args=commandArgs(trailingOnly=TRUE), positional_arguments=0)$options # get options and arguments workDir <- getwd() projectName <- opt$projectName # name of the project author <- opt$author # author of the statistical analysis/report targetFile <- opt$targetFile # path to the design/target file rawDir <- opt$rawDir # path to the directory containing raw counts files featuresToRemove <- unlist(strsplit(opt$FTR, ",")) # names of the features to be removed (specific HTSeq-count information and rRNA for example) varInt <- opt$varInt # factor of interest condRef <- opt$condRef # reference biological condition batch <- opt$batch # blocking factor: NULL (default) or "batch" for example fitType <- opt$fitType # mean-variance relationship: "parametric" (default) or "local" cooksCutoff <- opt$cooksCutoff # outliers detection threshold (NULL to let DESeq2 choosing it) independentFiltering <- opt$independentFiltering # TRUE/FALSE to perform independent filtering (default is TRUE) alpha <- as.numeric(opt$alpha) # threshold of statistical significance pAdjustMethod <- opt$pAdjustMethod # p-value adjustment method: "BH" (default) or "BY" typeTrans <- opt$typeTrans # transformation for PCA/clustering: "VST" ou "rlog" locfunc <- opt$locfunc # "median" (default) or "shorth" to estimate the size factors colors <- unlist(strsplit(opt$cols, ",")) # vector of colors of each biologicial condition on the plots # print(paste("workDir", workDir)) # print(paste("projectName", projectName)) # print(paste("author", author)) # print(paste("targetFile", targetFile)) # print(paste("rawDir", rawDir)) # print(paste("varInt", varInt)) # print(paste("condRef", condRef)) # print(paste("batch", batch)) # print(paste("fitType", fitType)) # print(paste("cooksCutoff", cooksCutoff)) # print(paste("independentFiltering", independentFiltering)) # print(paste("alpha", alpha)) # print(paste("pAdjustMethod", pAdjustMethod)) # print(paste("typeTrans", typeTrans)) # print(paste("locfunc", locfunc)) # print(paste("featuresToRemove", featuresToRemove)) # print(paste("colors", colors)) ################################################################################ ### running script ### ################################################################################ # setwd(workDir) library(SARTools) # checking parameters problem <- checkParameters.DESeq2(projectName=projectName,author=author,targetFile=targetFile, rawDir=rawDir,featuresToRemove=featuresToRemove,varInt=varInt, condRef=condRef,batch=batch,fitType=fitType,cooksCutoff=cooksCutoff, independentFiltering=independentFiltering,alpha=alpha,pAdjustMethod=pAdjustMethod, typeTrans=typeTrans,locfunc=locfunc,colors=colors) if (problem) quit(save="yes") # loading target file target <- loadTargetFile(targetFile=targetFile, varInt=varInt, condRef=condRef, batch=batch) # loading counts counts <- loadCountData(target=target, rawDir=rawDir, featuresToRemove=featuresToRemove) # description plots majSequences <- descriptionPlots(counts=counts, group=target[,varInt], col=colors) # analysis with DESeq2 out.DESeq2 <- run.DESeq2(counts=counts, target=target, varInt=varInt, batch=batch, locfunc=locfunc, fitType=fitType, pAdjustMethod=pAdjustMethod, cooksCutoff=cooksCutoff, independentFiltering=independentFiltering, alpha=alpha) # PCA + clustering exploreCounts(object=out.DESeq2$dds, group=target[,varInt], typeTrans=typeTrans, col=colors) # summary of the analysis (boxplots, dispersions, diag size factors, export table, nDiffTotal, histograms, MA plot) summaryResults <- summarizeResults.DESeq2(out.DESeq2, group=target[,varInt], col=colors, independentFiltering=independentFiltering, cooksCutoff=cooksCutoff, alpha=alpha) # save image of the R session save.image(file=paste0(projectName, ".RData")) # generating HTML report writeReport.DESeq2(target=target, counts=counts, out.DESeq2=out.DESeq2, summaryResults=summaryResults, majSequences=majSequences, workDir=workDir, projectName=projectName, author=author, targetFile=targetFile, rawDir=rawDir, featuresToRemove=featuresToRemove, varInt=varInt, condRef=condRef, batch=batch, fitType=fitType, cooksCutoff=cooksCutoff, independentFiltering=independentFiltering, alpha=alpha, pAdjustMethod=pAdjustMethod, typeTrans=typeTrans, locfunc=locfunc, colors=colors)