0
|
1 #!/local/gensoft2/exe/R/3.1.2/bin/Rscript
|
|
2
|
|
3 # to run this script, use one of these commands:
|
|
4 # Rscript --no-save --no-restore --verbose template_script_DESeq2_CL.r -r raw -v group -c T0 > log.txt 2>&1
|
|
5 # Rscript template_script_DESeq2_CL.r -r raw -v group -c T0
|
|
6
|
|
7 # to get help:
|
|
8 # Rscript template_script_DESeq2_CL.r --help
|
|
9
|
|
10 ################################################################################
|
|
11 ### R script to compare several conditions with the SARTools and DESeq2 packages
|
|
12 ### Hugo Varet
|
|
13 ### April 20th, 2015
|
|
14 ### designed to be executed with SARTools 1.1.0
|
|
15 ################################################################################
|
|
16
|
|
17 rm(list=ls()) # remove all the objects from the R session
|
|
18 library(optparse) # to run the script in command lines
|
|
19
|
|
20 # options list with associated default value.
|
|
21 option_list <- list(
|
|
22 make_option(c("-P", "--projectName"),
|
|
23 default=basename(getwd()),
|
|
24 dest="projectName",
|
|
25 help="name of the project used for the report [default: name of the current directory]."),
|
|
26
|
|
27 make_option(c("-A", "--author"),
|
|
28 default=Sys.info()[7],
|
|
29 dest="author",
|
|
30 help="name of the report author [default: %default]."),
|
|
31
|
|
32 make_option(c("-t", "--targetFile"),
|
|
33 default="target.txt",
|
|
34 dest="targetFile",
|
|
35 help="path to the design/target file [default: %default]."),
|
|
36
|
|
37 make_option(c("-r", "--rawDir"),
|
|
38 default="raw",
|
|
39 dest="rawDir",
|
|
40 help="path to the directory containing the HTSeq files [default: %default]."),
|
|
41
|
|
42 make_option(c("-F", "--featuresToRemove"),
|
|
43 default="alignment_not_unique,ambiguous,no_feature,not_aligned,too_low_aQual",
|
|
44 dest="FTR",
|
|
45 help="names of the features to be removed, more than once can be specified [default: %default]"),
|
|
46
|
|
47 make_option(c("-v", "--varInt"),
|
|
48 default="group",
|
|
49 dest="varInt",
|
|
50 help="factor of interest [default: %default]"),
|
|
51
|
|
52 make_option(c("-c", "--condRef"),
|
|
53 default="WT",
|
|
54 dest="condRef",
|
|
55 help="reference biological condition [default: %default]"),
|
|
56
|
|
57 make_option(c("-b", "--batch"),
|
|
58 default=NULL,
|
|
59 dest="batch",
|
|
60 help="blocking factor [default: %default] or \"batch\" for example"),
|
|
61
|
|
62 make_option(c("-f", "--fitType"),
|
|
63 default="parametric",
|
|
64 dest="fitType",
|
|
65 help="mean-variance relationship: [default: %default] or local"),
|
|
66
|
|
67 make_option(c("-o", "--cooksCutoff"),
|
|
68 default=TRUE,
|
|
69 dest="cooksCutoff",
|
|
70 help="perform the outliers detection (default is TRUE)"),
|
|
71
|
|
72 make_option(c("-i", "--independentFiltering"),
|
|
73 default=TRUE,
|
|
74 dest="independentFiltering",
|
|
75 help="perform independent filtering (default is TRUE)"),
|
|
76
|
|
77 make_option(c("-a", "--alpha"),
|
|
78 default=0.05,
|
|
79 dest="alpha",
|
|
80 help="threshold of statistical significance [default: %default]"),
|
|
81
|
|
82 make_option(c("-p", "--pAdjustMethod"),
|
|
83 default="BH",
|
|
84 dest="pAdjustMethod",
|
|
85 help="p-value adjustment method: \"BH\" or \"BY\" [default: %default]"),
|
|
86
|
|
87 make_option(c("-T", "--typeTrans"),
|
|
88 default="VST",
|
|
89 dest="typeTrans",
|
|
90 help="transformation for PCA/clustering: \"VST\" ou \"rlog\" [default: %default]"),
|
|
91
|
|
92 make_option(c("-l", "--locfunc"),
|
|
93 default="median",
|
|
94 dest="locfunc",
|
|
95 help="median or shorth to estimate the size factors [default: %default]"),
|
|
96
|
|
97 make_option(c("-C", "--colors"),
|
|
98 default="dodgerblue,firebrick1,MediumVioletRed,SpringGreen,chartreuse,cyan,darkorchid,darkorange",
|
|
99 dest="cols",
|
|
100 help="colors of each biological condition on the plots\n\t\t\"col1,col2,col3,col4\"\n\t\t[default: %default]")
|
|
101 )
|
|
102
|
|
103 # now parse the command line to check which option is given and get associated values
|
|
104 parser <- OptionParser(usage="usage: %prog [options]",
|
|
105 option_list=option_list,
|
|
106 description="Compare two or more biological conditions in a RNA-Seq framework with DESeq2.",
|
|
107 epilogue="For comments, bug reports etc... please contact Hugo Varet <hugo.varet@pasteur.fr>")
|
|
108 opt <- parse_args(parser, args=commandArgs(trailingOnly=TRUE), positional_arguments=0)$options
|
|
109
|
|
110 # get options and arguments
|
|
111 workDir <- getwd()
|
|
112 projectName <- opt$projectName # name of the project
|
|
113 author <- opt$author # author of the statistical analysis/report
|
|
114 targetFile <- opt$targetFile # path to the design/target file
|
|
115 rawDir <- opt$rawDir # path to the directory containing raw counts files
|
|
116 featuresToRemove <- unlist(strsplit(opt$FTR, ",")) # names of the features to be removed (specific HTSeq-count information and rRNA for example)
|
|
117 varInt <- opt$varInt # factor of interest
|
|
118 condRef <- opt$condRef # reference biological condition
|
|
119 batch <- opt$batch # blocking factor: NULL (default) or "batch" for example
|
|
120 fitType <- opt$fitType # mean-variance relationship: "parametric" (default) or "local"
|
|
121 cooksCutoff <- opt$cooksCutoff # outliers detection threshold (NULL to let DESeq2 choosing it)
|
|
122 independentFiltering <- opt$independentFiltering # TRUE/FALSE to perform independent filtering (default is TRUE)
|
|
123 alpha <- as.numeric(opt$alpha) # threshold of statistical significance
|
|
124 pAdjustMethod <- opt$pAdjustMethod # p-value adjustment method: "BH" (default) or "BY"
|
|
125 typeTrans <- opt$typeTrans # transformation for PCA/clustering: "VST" ou "rlog"
|
|
126 locfunc <- opt$locfunc # "median" (default) or "shorth" to estimate the size factors
|
|
127 colors <- unlist(strsplit(opt$cols, ",")) # vector of colors of each biologicial condition on the plots
|
|
128
|
|
129 # print(paste("workDir", workDir))
|
|
130 # print(paste("projectName", projectName))
|
|
131 # print(paste("author", author))
|
|
132 # print(paste("targetFile", targetFile))
|
|
133 # print(paste("rawDir", rawDir))
|
|
134 # print(paste("varInt", varInt))
|
|
135 # print(paste("condRef", condRef))
|
|
136 # print(paste("batch", batch))
|
|
137 # print(paste("fitType", fitType))
|
|
138 # print(paste("cooksCutoff", cooksCutoff))
|
|
139 # print(paste("independentFiltering", independentFiltering))
|
|
140 # print(paste("alpha", alpha))
|
|
141 # print(paste("pAdjustMethod", pAdjustMethod))
|
|
142 # print(paste("typeTrans", typeTrans))
|
|
143 # print(paste("locfunc", locfunc))
|
|
144 # print(paste("featuresToRemove", featuresToRemove))
|
|
145 # print(paste("colors", colors))
|
|
146
|
|
147 ################################################################################
|
|
148 ### running script ###
|
|
149 ################################################################################
|
|
150 # setwd(workDir)
|
|
151 library(SARTools)
|
|
152
|
|
153 # checking parameters
|
|
154 problem <- checkParameters.DESeq2(projectName=projectName,author=author,targetFile=targetFile,
|
|
155 rawDir=rawDir,featuresToRemove=featuresToRemove,varInt=varInt,
|
|
156 condRef=condRef,batch=batch,fitType=fitType,cooksCutoff=cooksCutoff,
|
|
157 independentFiltering=independentFiltering,alpha=alpha,pAdjustMethod=pAdjustMethod,
|
|
158 typeTrans=typeTrans,locfunc=locfunc,colors=colors)
|
|
159 if (problem) quit(save="yes")
|
|
160
|
|
161 # loading target file
|
|
162 target <- loadTargetFile(targetFile=targetFile, varInt=varInt, condRef=condRef, batch=batch)
|
|
163
|
|
164 # loading counts
|
|
165 counts <- loadCountData(target=target, rawDir=rawDir, featuresToRemove=featuresToRemove)
|
|
166
|
|
167 # description plots
|
|
168 majSequences <- descriptionPlots(counts=counts, group=target[,varInt], col=colors)
|
|
169
|
|
170 # analysis with DESeq2
|
|
171 out.DESeq2 <- run.DESeq2(counts=counts, target=target, varInt=varInt, batch=batch,
|
|
172 locfunc=locfunc, fitType=fitType, pAdjustMethod=pAdjustMethod,
|
|
173 cooksCutoff=cooksCutoff, independentFiltering=independentFiltering, alpha=alpha)
|
|
174
|
|
175 # PCA + clustering
|
|
176 exploreCounts(object=out.DESeq2$dds, group=target[,varInt], typeTrans=typeTrans, col=colors)
|
|
177
|
|
178 # summary of the analysis (boxplots, dispersions, diag size factors, export table, nDiffTotal, histograms, MA plot)
|
|
179 summaryResults <- summarizeResults.DESeq2(out.DESeq2, group=target[,varInt], col=colors,
|
|
180 independentFiltering=independentFiltering,
|
|
181 cooksCutoff=cooksCutoff, alpha=alpha)
|
|
182
|
|
183 # save image of the R session
|
|
184 save.image(file=paste0(projectName, ".RData"))
|
|
185
|
|
186 # generating HTML report
|
|
187 writeReport.DESeq2(target=target, counts=counts, out.DESeq2=out.DESeq2, summaryResults=summaryResults,
|
|
188 majSequences=majSequences, workDir=workDir, projectName=projectName, author=author,
|
|
189 targetFile=targetFile, rawDir=rawDir, featuresToRemove=featuresToRemove, varInt=varInt,
|
|
190 condRef=condRef, batch=batch, fitType=fitType, cooksCutoff=cooksCutoff,
|
|
191 independentFiltering=independentFiltering, alpha=alpha, pAdjustMethod=pAdjustMethod,
|
|
192 typeTrans=typeTrans, locfunc=locfunc, colors=colors)
|