Mercurial > repos > lgueguen > sartools
comparison template_script_DESeq2_CL.r @ 0:581d217c7337 draft
Planemo upload
author | lgueguen |
---|---|
date | Fri, 22 Jul 2016 05:39:13 -0400 |
parents | |
children | de6d0b7c17af |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:581d217c7337 |
---|---|
1 #!/local/gensoft2/exe/R/3.1.2/bin/Rscript | |
2 | |
3 # to run this script, use one of these commands: | |
4 # Rscript --no-save --no-restore --verbose template_script_DESeq2_CL.r -r raw -v group -c T0 > log.txt 2>&1 | |
5 # Rscript template_script_DESeq2_CL.r -r raw -v group -c T0 | |
6 | |
7 # to get help: | |
8 # Rscript template_script_DESeq2_CL.r --help | |
9 | |
10 ################################################################################ | |
11 ### R script to compare several conditions with the SARTools and DESeq2 packages | |
12 ### Hugo Varet | |
13 ### April 20th, 2015 | |
14 ### designed to be executed with SARTools 1.1.0 | |
15 ################################################################################ | |
16 | |
17 rm(list=ls()) # remove all the objects from the R session | |
18 library(optparse) # to run the script in command lines | |
19 | |
20 # options list with associated default value. | |
21 option_list <- list( | |
22 make_option(c("-P", "--projectName"), | |
23 default=basename(getwd()), | |
24 dest="projectName", | |
25 help="name of the project used for the report [default: name of the current directory]."), | |
26 | |
27 make_option(c("-A", "--author"), | |
28 default=Sys.info()[7], | |
29 dest="author", | |
30 help="name of the report author [default: %default]."), | |
31 | |
32 make_option(c("-t", "--targetFile"), | |
33 default="target.txt", | |
34 dest="targetFile", | |
35 help="path to the design/target file [default: %default]."), | |
36 | |
37 make_option(c("-r", "--rawDir"), | |
38 default="raw", | |
39 dest="rawDir", | |
40 help="path to the directory containing the HTSeq files [default: %default]."), | |
41 | |
42 make_option(c("-F", "--featuresToRemove"), | |
43 default="alignment_not_unique,ambiguous,no_feature,not_aligned,too_low_aQual", | |
44 dest="FTR", | |
45 help="names of the features to be removed, more than once can be specified [default: %default]"), | |
46 | |
47 make_option(c("-v", "--varInt"), | |
48 default="group", | |
49 dest="varInt", | |
50 help="factor of interest [default: %default]"), | |
51 | |
52 make_option(c("-c", "--condRef"), | |
53 default="WT", | |
54 dest="condRef", | |
55 help="reference biological condition [default: %default]"), | |
56 | |
57 make_option(c("-b", "--batch"), | |
58 default=NULL, | |
59 dest="batch", | |
60 help="blocking factor [default: %default] or \"batch\" for example"), | |
61 | |
62 make_option(c("-f", "--fitType"), | |
63 default="parametric", | |
64 dest="fitType", | |
65 help="mean-variance relationship: [default: %default] or local"), | |
66 | |
67 make_option(c("-o", "--cooksCutoff"), | |
68 default=TRUE, | |
69 dest="cooksCutoff", | |
70 help="perform the outliers detection (default is TRUE)"), | |
71 | |
72 make_option(c("-i", "--independentFiltering"), | |
73 default=TRUE, | |
74 dest="independentFiltering", | |
75 help="perform independent filtering (default is TRUE)"), | |
76 | |
77 make_option(c("-a", "--alpha"), | |
78 default=0.05, | |
79 dest="alpha", | |
80 help="threshold of statistical significance [default: %default]"), | |
81 | |
82 make_option(c("-p", "--pAdjustMethod"), | |
83 default="BH", | |
84 dest="pAdjustMethod", | |
85 help="p-value adjustment method: \"BH\" or \"BY\" [default: %default]"), | |
86 | |
87 make_option(c("-T", "--typeTrans"), | |
88 default="VST", | |
89 dest="typeTrans", | |
90 help="transformation for PCA/clustering: \"VST\" ou \"rlog\" [default: %default]"), | |
91 | |
92 make_option(c("-l", "--locfunc"), | |
93 default="median", | |
94 dest="locfunc", | |
95 help="median or shorth to estimate the size factors [default: %default]"), | |
96 | |
97 make_option(c("-C", "--colors"), | |
98 default="dodgerblue,firebrick1,MediumVioletRed,SpringGreen,chartreuse,cyan,darkorchid,darkorange", | |
99 dest="cols", | |
100 help="colors of each biological condition on the plots\n\t\t\"col1,col2,col3,col4\"\n\t\t[default: %default]") | |
101 ) | |
102 | |
103 # now parse the command line to check which option is given and get associated values | |
104 parser <- OptionParser(usage="usage: %prog [options]", | |
105 option_list=option_list, | |
106 description="Compare two or more biological conditions in a RNA-Seq framework with DESeq2.", | |
107 epilogue="For comments, bug reports etc... please contact Hugo Varet <hugo.varet@pasteur.fr>") | |
108 opt <- parse_args(parser, args=commandArgs(trailingOnly=TRUE), positional_arguments=0)$options | |
109 | |
110 # get options and arguments | |
111 workDir <- getwd() | |
112 projectName <- opt$projectName # name of the project | |
113 author <- opt$author # author of the statistical analysis/report | |
114 targetFile <- opt$targetFile # path to the design/target file | |
115 rawDir <- opt$rawDir # path to the directory containing raw counts files | |
116 featuresToRemove <- unlist(strsplit(opt$FTR, ",")) # names of the features to be removed (specific HTSeq-count information and rRNA for example) | |
117 varInt <- opt$varInt # factor of interest | |
118 condRef <- opt$condRef # reference biological condition | |
119 batch <- opt$batch # blocking factor: NULL (default) or "batch" for example | |
120 fitType <- opt$fitType # mean-variance relationship: "parametric" (default) or "local" | |
121 cooksCutoff <- opt$cooksCutoff # outliers detection threshold (NULL to let DESeq2 choosing it) | |
122 independentFiltering <- opt$independentFiltering # TRUE/FALSE to perform independent filtering (default is TRUE) | |
123 alpha <- as.numeric(opt$alpha) # threshold of statistical significance | |
124 pAdjustMethod <- opt$pAdjustMethod # p-value adjustment method: "BH" (default) or "BY" | |
125 typeTrans <- opt$typeTrans # transformation for PCA/clustering: "VST" ou "rlog" | |
126 locfunc <- opt$locfunc # "median" (default) or "shorth" to estimate the size factors | |
127 colors <- unlist(strsplit(opt$cols, ",")) # vector of colors of each biologicial condition on the plots | |
128 | |
129 # print(paste("workDir", workDir)) | |
130 # print(paste("projectName", projectName)) | |
131 # print(paste("author", author)) | |
132 # print(paste("targetFile", targetFile)) | |
133 # print(paste("rawDir", rawDir)) | |
134 # print(paste("varInt", varInt)) | |
135 # print(paste("condRef", condRef)) | |
136 # print(paste("batch", batch)) | |
137 # print(paste("fitType", fitType)) | |
138 # print(paste("cooksCutoff", cooksCutoff)) | |
139 # print(paste("independentFiltering", independentFiltering)) | |
140 # print(paste("alpha", alpha)) | |
141 # print(paste("pAdjustMethod", pAdjustMethod)) | |
142 # print(paste("typeTrans", typeTrans)) | |
143 # print(paste("locfunc", locfunc)) | |
144 # print(paste("featuresToRemove", featuresToRemove)) | |
145 # print(paste("colors", colors)) | |
146 | |
147 ################################################################################ | |
148 ### running script ### | |
149 ################################################################################ | |
150 # setwd(workDir) | |
151 library(SARTools) | |
152 | |
153 # checking parameters | |
154 problem <- checkParameters.DESeq2(projectName=projectName,author=author,targetFile=targetFile, | |
155 rawDir=rawDir,featuresToRemove=featuresToRemove,varInt=varInt, | |
156 condRef=condRef,batch=batch,fitType=fitType,cooksCutoff=cooksCutoff, | |
157 independentFiltering=independentFiltering,alpha=alpha,pAdjustMethod=pAdjustMethod, | |
158 typeTrans=typeTrans,locfunc=locfunc,colors=colors) | |
159 if (problem) quit(save="yes") | |
160 | |
161 # loading target file | |
162 target <- loadTargetFile(targetFile=targetFile, varInt=varInt, condRef=condRef, batch=batch) | |
163 | |
164 # loading counts | |
165 counts <- loadCountData(target=target, rawDir=rawDir, featuresToRemove=featuresToRemove) | |
166 | |
167 # description plots | |
168 majSequences <- descriptionPlots(counts=counts, group=target[,varInt], col=colors) | |
169 | |
170 # analysis with DESeq2 | |
171 out.DESeq2 <- run.DESeq2(counts=counts, target=target, varInt=varInt, batch=batch, | |
172 locfunc=locfunc, fitType=fitType, pAdjustMethod=pAdjustMethod, | |
173 cooksCutoff=cooksCutoff, independentFiltering=independentFiltering, alpha=alpha) | |
174 | |
175 # PCA + clustering | |
176 exploreCounts(object=out.DESeq2$dds, group=target[,varInt], typeTrans=typeTrans, col=colors) | |
177 | |
178 # summary of the analysis (boxplots, dispersions, diag size factors, export table, nDiffTotal, histograms, MA plot) | |
179 summaryResults <- summarizeResults.DESeq2(out.DESeq2, group=target[,varInt], col=colors, | |
180 independentFiltering=independentFiltering, | |
181 cooksCutoff=cooksCutoff, alpha=alpha) | |
182 | |
183 # save image of the R session | |
184 save.image(file=paste0(projectName, ".RData")) | |
185 | |
186 # generating HTML report | |
187 writeReport.DESeq2(target=target, counts=counts, out.DESeq2=out.DESeq2, summaryResults=summaryResults, | |
188 majSequences=majSequences, workDir=workDir, projectName=projectName, author=author, | |
189 targetFile=targetFile, rawDir=rawDir, featuresToRemove=featuresToRemove, varInt=varInt, | |
190 condRef=condRef, batch=batch, fitType=fitType, cooksCutoff=cooksCutoff, | |
191 independentFiltering=independentFiltering, alpha=alpha, pAdjustMethod=pAdjustMethod, | |
192 typeTrans=typeTrans, locfunc=locfunc, colors=colors) |