annotate template_script_edgeR_CL.r @ 3:de6d0b7c17af draft

release 1.6.3
author lgueguen
date Mon, 01 Oct 2018 05:07:56 -0400
parents 581d217c7337
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
1 ################################################################################
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
2 ### R script to compare several conditions with the SARTools and edgeR packages
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
3 ### Hugo Varet
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
4 ### May 16th, 2018
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
5 ### designed to be executed with SARTools 1.6.3
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
6 ### run "Rscript template_script_edgeR_CL.r --help" to get some help
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
7 ################################################################################
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
8
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
9 rm(list=ls()) # remove all the objects from the R session
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
10 library(optparse) # to run the script in command lines
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
11
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
12 # options list with associated default value.
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
13 option_list <- list(
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
14 make_option(c("-P", "--projectName"),
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
15 default=basename(getwd()),
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
16 dest="projectName",
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
17 help="name of the project used for the report [default: name of the current directory]."),
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
18
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
19 make_option(c("-A", "--author"),
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
20 default=Sys.info()[7],
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
21 dest="author",
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
22 help="name of the report author [default: %default]."),
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
23
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
24 make_option(c("-t", "--targetFile"),
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
25 default="target.txt",
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
26 dest="targetFile",
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
27 help="path to the design/target file [default: %default]."),
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
28
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
29 make_option(c("-r", "--rawDir"),
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
30 default="raw",
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
31 dest="rawDir",
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
32 help="path to the directory containing the HTSeq files [default: %default]."),
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
33
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
34 make_option(c("-F", "--featuresToRemove"),
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
35 default="alignment_not_unique,ambiguous,no_feature,not_aligned,too_low_aQual",
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
36 dest="FTR",
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
37 help="names of the features to be removed, more than once can be specified [default: %default]"),
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
38
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
39 make_option(c("-v", "--varInt"),
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
40 default="group",
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
41 dest="varInt",
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
42 help="factor of interest [default: %default]"),
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
43
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
44 make_option(c("-c", "--condRef"),
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
45 default="WT",
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
46 dest="condRef",
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
47 help="reference biological condition [default: %default]"),
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
48
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
49 make_option(c("-b", "--batch"),
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
50 default=NULL,
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
51 dest="batch",
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
52 help="blocking factor [default: %default] or \"batch\" for example"),
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
53
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
54 make_option(c("-a", "--alpha"),
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
55 default=0.05,
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
56 dest="alpha",
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
57 help="threshold of statistical significance [default: %default]"),
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
58
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
59 make_option(c("-p", "--pAdjustMethod"),
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
60 default="BH",
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
61 dest="pAdjustMethod",
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
62 help="p-value adjustment method: \"BH\" or \"BY\" [default: %default]"),
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
63
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
64 make_option(c("-m", "--cpmCutoff"),
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
65 default=1,
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
66 dest="cpmCutoff",
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
67 help="counts-per-million cut-off to filter low counts"),
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
68
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
69 make_option(c("-g", "--gene.selection"),
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
70 default="pairwise",
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
71 dest="gene.selection",
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
72 help="selection of the features in MDSPlot [default: %default]"),
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
73
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
74 make_option(c("-n", "--normalizationMethod"),
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
75 default="TMM",
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
76 dest="normalizationMethod",
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
77 help="normalization method in calcNormFactors: \"TMM\", \"RLE\" or \"upperquartile\" [default: %default]"),
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
78
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
79 make_option(c("-C", "--colors"),
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
80 default="dodgerblue,firebrick1,MediumVioletRed,SpringGreen,chartreuse,cyan,darkorchid,darkorange",
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
81 dest="cols",
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
82 help="colors of each biological condition on the plots\n\t\t\"col1,col2,col3,col4\"\n\t\t[default: %default]"),
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
83
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
84 make_option(c("-f", "--forceCairoGraph"),
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
85 action="store_true",
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
86 default=FALSE,
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
87 dest="forceCairoGraph",
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
88 help="activate cairo type")
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
89 )
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
90
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
91 # now parse the command line to check which option is given and get associated values
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
92 parser <- OptionParser(usage="usage: %prog [options]",
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
93 option_list=option_list,
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
94 description="Compare two or more biological conditions in a RNA-Seq framework with edgeR.",
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
95 epilogue="For comments, bug reports etc... please contact Hugo Varet <hugo.varet@pasteur.fr>")
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
96 opt <- parse_args(parser, args=commandArgs(trailingOnly=TRUE), positional_arguments=0)$options
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
97
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
98 # get options and arguments
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
99 workDir <- getwd()
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
100 projectName <- opt$projectName # name of the project
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
101 author <- opt$author # author of the statistical analysis/report
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
102 targetFile <- opt$targetFile # path to the design/target file
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
103 rawDir <- opt$rawDir # path to the directory containing raw counts files
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
104 featuresToRemove <- unlist(strsplit(opt$FTR, ",")) # names of the features to be removed (specific HTSeq-count information and rRNA for example)
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
105 varInt <- opt$varInt # factor of interest
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
106 condRef <- opt$condRef # reference biological condition
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
107 batch <- opt$batch # blocking factor: NULL (default) or "batch" for example
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
108 alpha <- as.numeric(opt$alpha) # threshold of statistical significance
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
109 pAdjustMethod <- opt$pAdjustMethod # p-value adjustment method: "BH" (default) or "BY"
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
110 gene.selection <- opt$gene.selection # selection of the features in MDSPlot
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
111 normalizationMethod <- opt$normalizationMethod # normalization method in calcNormFactors
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
112 cpmCutoff <- opt$cpmCutoff # counts-per-million cut-off to filter low counts
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
113 colors <- unlist(strsplit(opt$cols, ",")) # vector of colors of each biologicial condition on the plots
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
114 forceCairoGraph <- opt$forceCairoGraph # force cairo as plotting device if enabled
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
115 # print(paste("workDir", workDir))
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
116 # print(paste("projectName", projectName))
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
117 # print(paste("author", author))
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
118 # print(paste("targetFile", targetFile))
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
119 # print(paste("rawDir", rawDir))
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
120 # print(paste("varInt", varInt))
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
121 # print(paste("condRef", condRef))
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
122 # print(paste("batch", batch))
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
123 # print(paste("alpha", alpha))
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
124 # print(paste("pAdjustMethod", pAdjustMethod))
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
125 # print(paste("featuresToRemove", featuresToRemove))
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
126 # print(paste("colors", colors))
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
127 # print(paste("gene.selection", gene.selection))
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
128 # print(paste("normalizationMethod", normalizationMethod))
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
129 # print(paste("cpmCutoff", cpmCutoff))
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
130
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
131 ################################################################################
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
132 ### running script ###
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
133 ################################################################################
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
134 # setwd(workDir)
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
135 library(SARTools)
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
136 if (forceCairoGraph) options(bitmapType="cairo")
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
137
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
138 # checking parameters
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
139 problem <- checkParameters.edgeR(projectName=projectName,author=author,targetFile=targetFile,
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
140 rawDir=rawDir,featuresToRemove=featuresToRemove,varInt=varInt,
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
141 condRef=condRef,batch=batch,alpha=alpha,pAdjustMethod=pAdjustMethod,
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
142 cpmCutoff=cpmCutoff,gene.selection=gene.selection,
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
143 normalizationMethod=normalizationMethod,colors=colors)
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
144 if (problem) quit(save="yes")
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
145
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
146 # loading target file
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
147 target <- loadTargetFile(targetFile=targetFile, varInt=varInt, condRef=condRef, batch=batch)
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
148
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
149 # loading counts
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
150 counts <- loadCountData(target=target, rawDir=rawDir, featuresToRemove=featuresToRemove)
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
151
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
152 # description plots
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
153 majSequences <- descriptionPlots(counts=counts, group=target[,varInt], col=colors)
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
154
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
155 # edgeR analysis
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
156 out.edgeR <- run.edgeR(counts=counts, target=target, varInt=varInt, condRef=condRef,
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
157 batch=batch, cpmCutoff=cpmCutoff, normalizationMethod=normalizationMethod,
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
158 pAdjustMethod=pAdjustMethod)
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
159
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
160 # MDS + clustering
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
161 exploreCounts(object=out.edgeR$dge, group=target[,varInt], gene.selection=gene.selection, col=colors)
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
162
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
163 # summary of the analysis (boxplots, dispersions, export table, nDiffTotal, histograms, MA plot)
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
164 summaryResults <- summarizeResults.edgeR(out.edgeR, group=target[,varInt], counts=counts, alpha=alpha, col=colors)
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
165
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
166 # save image of the R session
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
167 save.image(file=paste0(projectName, ".RData"))
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
168
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
169 # generating HTML report
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
170 writeReport.edgeR(target=target, counts=counts, out.edgeR=out.edgeR, summaryResults=summaryResults,
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
171 majSequences=majSequences, workDir=workDir, projectName=projectName, author=author,
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
172 targetFile=targetFile, rawDir=rawDir, featuresToRemove=featuresToRemove, varInt=varInt,
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
173 condRef=condRef, batch=batch, alpha=alpha, pAdjustMethod=pAdjustMethod, cpmCutoff=cpmCutoff,
de6d0b7c17af release 1.6.3
lgueguen
parents: 0
diff changeset
174 colors=colors, gene.selection=gene.selection, normalizationMethod=normalizationMethod)