comparison template_script_edgeR_CL.r @ 0:581d217c7337 draft

Planemo upload
author lgueguen
date Fri, 22 Jul 2016 05:39:13 -0400
parents
children de6d0b7c17af
comparison
equal deleted inserted replaced
-1:000000000000 0:581d217c7337
1 #!/local/gensoft2/exe/R/3.1.2/bin/Rscript
2
3 # to run this script, use one of these commands:
4 # Rscript --no-save --no-restore --verbose template_script_edgeR_CL.r -r raw -v group -c T0 > log.txt 2>&1
5 # Rscript template_script_edgeR_CL.r -r raw -v group -c T0
6
7 # to get help:
8 # Rscript template_script_edgeR_CL.r --help
9
10 ################################################################################
11 ### R script to compare several conditions with the SARTools and edgeR packages
12 ### Hugo Varet
13 ### April 20th, 2015
14 ### designed to be executed with SARTools 1.1.0
15 ################################################################################
16
17 rm(list=ls()) # remove all the objects from the R session
18 library(optparse) # to run the script in command lines
19
20 # options list with associated default value.
21 option_list <- list(
22 make_option(c("-P", "--projectName"),
23 default=basename(getwd()),
24 dest="projectName",
25 help="name of the project used for the report [default: name of the current directory]."),
26
27 make_option(c("-A", "--author"),
28 default=Sys.info()[7],
29 dest="author",
30 help="name of the report author [default: %default]."),
31
32 make_option(c("-t", "--targetFile"),
33 default="target.txt",
34 dest="targetFile",
35 help="path to the design/target file [default: %default]."),
36
37 make_option(c("-r", "--rawDir"),
38 default="raw",
39 dest="rawDir",
40 help="path to the directory containing the HTSeq files [default: %default]."),
41
42 make_option(c("-F", "--featuresToRemove"),
43 default="alignment_not_unique,ambiguous,no_feature,not_aligned,too_low_aQual",
44 dest="FTR",
45 help="names of the features to be removed, more than once can be specified [default: %default]"),
46
47 make_option(c("-v", "--varInt"),
48 default="group",
49 dest="varInt",
50 help="factor of interest [default: %default]"),
51
52 make_option(c("-c", "--condRef"),
53 default="WT",
54 dest="condRef",
55 help="reference biological condition [default: %default]"),
56
57 make_option(c("-b", "--batch"),
58 default=NULL,
59 dest="batch",
60 help="blocking factor [default: %default] or \"batch\" for example"),
61
62 make_option(c("-a", "--alpha"),
63 default=0.05,
64 dest="alpha",
65 help="threshold of statistical significance [default: %default]"),
66
67 make_option(c("-p", "--pAdjustMethod"),
68 default="BH",
69 dest="pAdjustMethod",
70 help="p-value adjustment method: \"BH\" or \"BY\" [default: %default]"),
71
72 make_option(c("-m", "--cpmCutoff"),
73 default=1,
74 dest="cpmCutoff",
75 help="counts-per-million cut-off to filter low counts"),
76
77 make_option(c("-g", "--gene.selection"),
78 default="pairwise",
79 dest="gene.selection",
80 help="selection of the features in MDSPlot [default: %default]"),
81
82 make_option(c("-n", "--normalizationMethod"),
83 default="TMM",
84 dest="normalizationMethod",
85 help="normalization method in calcNormFactors: \"TMM\", \"RLE\" or \"upperquartile\" [default: %default]"),
86
87 make_option(c("-C", "--colors"),
88 default="dodgerblue,firebrick1,MediumVioletRed,SpringGreen,chartreuse,cyan,darkorchid,darkorange",
89 dest="cols",
90 help="colors of each biological condition on the plots\n\t\t\"col1,col2,col3,col4\"\n\t\t[default: %default]")
91 )
92
93 # now parse the command line to check which option is given and get associated values
94 parser <- OptionParser(usage="usage: %prog [options]",
95 option_list=option_list,
96 description="Compare two or more biological conditions in a RNA-Seq framework with edgeR.",
97 epilogue="For comments, bug reports etc... please contact Hugo Varet <hugo.varet@pasteur.fr>")
98 opt <- parse_args(parser, args=commandArgs(trailingOnly=TRUE), positional_arguments=0)$options
99
100 # get options and arguments
101 workDir <- getwd()
102 projectName <- opt$projectName # name of the project
103 author <- opt$author # author of the statistical analysis/report
104 targetFile <- opt$targetFile # path to the design/target file
105 rawDir <- opt$rawDir # path to the directory containing raw counts files
106 featuresToRemove <- unlist(strsplit(opt$FTR, ",")) # names of the features to be removed (specific HTSeq-count information and rRNA for example)
107 varInt <- opt$varInt # factor of interest
108 condRef <- opt$condRef # reference biological condition
109 batch <- opt$batch # blocking factor: NULL (default) or "batch" for example
110 alpha <- as.numeric(opt$alpha) # threshold of statistical significance
111 pAdjustMethod <- opt$pAdjustMethod # p-value adjustment method: "BH" (default) or "BY"
112 gene.selection <- opt$gene.selection # selection of the features in MDSPlot
113 normalizationMethod <- opt$normalizationMethod # normalization method in calcNormFactors
114 cpmCutoff <- opt$cpmCutoff # counts-per-million cut-off to filter low counts
115 colors <- unlist(strsplit(opt$cols, ",")) # vector of colors of each biologicial condition on the plots
116
117 # print(paste("workDir", workDir))
118 # print(paste("projectName", projectName))
119 # print(paste("author", author))
120 # print(paste("targetFile", targetFile))
121 # print(paste("rawDir", rawDir))
122 # print(paste("varInt", varInt))
123 # print(paste("condRef", condRef))
124 # print(paste("batch", batch))
125 # print(paste("alpha", alpha))
126 # print(paste("pAdjustMethod", pAdjustMethod))
127 # print(paste("featuresToRemove", featuresToRemove))
128 # print(paste("colors", colors))
129 # print(paste("gene.selection", gene.selection))
130 # print(paste("normalizationMethod", normalizationMethod))
131 # print(paste("cpmCutoff", cpmCutoff))
132
133 ################################################################################
134 ### running script ###
135 ################################################################################
136 # setwd(workDir)
137 library(SARTools)
138
139 # checking parameters
140 problem <- checkParameters.edgeR(projectName=projectName,author=author,targetFile=targetFile,
141 rawDir=rawDir,featuresToRemove=featuresToRemove,varInt=varInt,
142 condRef=condRef,batch=batch,alpha=alpha,pAdjustMethod=pAdjustMethod,
143 cpmCutoff=cpmCutoff,gene.selection=gene.selection,
144 normalizationMethod=normalizationMethod,colors=colors)
145 if (problem) quit(save="yes")
146
147 # loading target file
148 target <- loadTargetFile(targetFile=targetFile, varInt=varInt, condRef=condRef, batch=batch)
149
150 # loading counts
151 counts <- loadCountData(target=target, rawDir=rawDir, featuresToRemove=featuresToRemove)
152
153 # description plots
154 majSequences <- descriptionPlots(counts=counts, group=target[,varInt], col=colors)
155
156 # edgeR analysis
157 out.edgeR <- run.edgeR(counts=counts, target=target, varInt=varInt, condRef=condRef,
158 batch=batch, cpmCutoff=cpmCutoff, normalizationMethod=normalizationMethod,
159 pAdjustMethod=pAdjustMethod)
160
161 # MDS + clustering
162 exploreCounts(object=out.edgeR$dge, group=target[,varInt], gene.selection=gene.selection, col=colors)
163
164 # summary of the analysis (boxplots, dispersions, export table, nDiffTotal, histograms, MA plot)
165 summaryResults <- summarizeResults.edgeR(out.edgeR, group=target[,varInt], counts=counts, alpha=alpha, col=colors)
166
167 # save image of the R session
168 save.image(file=paste0(projectName, ".RData"))
169
170 # generating HTML report
171 writeReport.edgeR(target=target, counts=counts, out.edgeR=out.edgeR, summaryResults=summaryResults,
172 majSequences=majSequences, workDir=workDir, projectName=projectName, author=author,
173 targetFile=targetFile, rawDir=rawDir, featuresToRemove=featuresToRemove, varInt=varInt,
174 condRef=condRef, batch=batch, alpha=alpha, pAdjustMethod=pAdjustMethod, colors=colors,
175 gene.selection=gene.selection, normalizationMethod=normalizationMethod)