comparison macros.xml @ 0:581d217c7337 draft

Planemo upload
author lgueguen
date Fri, 22 Jul 2016 05:39:13 -0400
parents
children d86ccac2a660
comparison
equal deleted inserted replaced
-1:000000000000 0:581d217c7337
1 <macros>
2 <xml name="requirements">
3 <requirements>
4 <requirement type="package" version="1.2.0">r-sartools</requirement>
5 <requirement type="package" version="1.3.0">r-optparse</requirement>
6 </requirements>
7 </xml>
8
9 <xml name="stdio">
10 <stdio>
11 <exit_code range="1" level="fatal" />
12 <regex match="Execution halted"
13 source="both"
14 level="fatal"
15 description="Execution halted" />
16 <regex match="rsync error"
17 source="both"
18 level="fatal"
19 description="rsync error" />
20 </stdio>
21 </xml>
22
23 <token name="@COMMAND_BASIC_PARAMETERS@">
24 --projectName $projectName
25 --author $author
26 --targetFile $targetFile
27 --rawDir $rawDir
28 --featuresToRemove $featuresToRemove
29 --varInt $varInt
30 --condRef $condRef
31 </token>
32
33 <token name="@COMMAND_BATCH_PARAM@">
34 #if $advanced_parameters.batch_condition.condition:
35 --batch $advanced_parameters.batch_condition.batch
36 #else:
37 --batch NULL
38 #end if
39 </token>
40
41 <token name="@COMMAND_OUTPUTS@">
42 --figures_html $figures_html
43 --figures_html_files_path $figures_html.files_path
44 --tables_html $tables_html
45 --tables_html_files_path $tables_html.files_path
46 --rdata $rdata
47 --report_html $report_html
48 --log $log
49 </token>
50
51 <macro name="basic_parameters">
52 <param name="projectName" type="text" value="Project" label="Name of the project used for the report" help="(-P, --projectName) No space allowed." >
53 <validator type="regex" message="Field requires a value. No space allowed.">\S+</validator>
54 </param>
55 <param name="author" type="text" value="Galaxy" label="Name of the report author" help="(-A, --author) No space allowed." >
56 <validator type="regex" message="Field requires a value. No space allowed.">\S+</validator>
57 </param>
58 <param name="targetFile" type="data" format="txt" label="Design / target file" help="(-t, --targetFile) See the help section below for details on the required format." />
59 <param name="rawDir" type="data" format="no_unzip.zip,zip" label="Zip file containing raw counts files" help="(-r, --rawDir) See the help section below for details on the required format." />
60 <param name="featuresToRemove" type="text" size="100" value="alignment_not_unique,ambiguous,no_feature,not_aligned,too_low_aQual" label="Names of the features to be removed" help="(-F, --featuresToRemove) Separate the features with a comma, no space allowed. More than once can be specified. Specific HTSeq-count information and rRNA for example. Default are 'alignment_not_unique,ambiguous,no_feature,not_aligned,too_low_aQual'." >
61 <validator type="regex" message="Field requires a value. No space allowed.">\S+</validator>
62 </param>
63 <param name="varInt" type="text" value="group" label="Factor of interest" help="(-v, --varInt) Biological condition in the target file. Default is 'group'." >
64 <validator type="regex" message="Field requires a value. No space allowed.">\S+</validator>
65 </param>
66 <param name="condRef" type="text" value="WT" label="Reference biological condition" help="(-c, --condRef) Reference biological condition used to compute fold-changes, must be one of the levels of 'Factor of interest'." >
67 <validator type="regex" message="Field requires a value. No space allowed.">\S+</validator>
68 </param>
69 </macro>
70
71 <macro name="batch_param">
72 <conditional name="batch_condition">
73 <param name="condition" type="boolean" checked="false" truevalue="batch" falsevalue="NULL" label="Add a blocking factor" help="(-b, --batch) Adjustment variable to use as a batch effect. Default: unchecked if no batch effect needs to be taken into account."/>
74 <when value="NULL" />
75 <when value="batch">
76 <param name="batch" type="text" value="batch" label="Blocking factor value" help="Must be a column of the target file" >
77 <validator type="empty_field"/>
78 </param>
79 </when>
80 </conditional>
81 </macro>
82
83 <macro name="alpha_param">
84 <param name="alpha" type="float" value="0.05" min="0" max="1" label="Threshold of statistical significance" help="(-a, --alpha) Significance threshold applied to the adjusted p-values to select the differentially expressed features. Default is 0.05. The comma is not allowed as decimal separator, use a point instead." />
85 </macro>
86
87 <macro name="padjustmethod_param">
88 <param name="pAdjustMethod" type="select" label="p-value adjustment method" help="(-p, --pAdjustMethod) p-value adjustment method for multiple testing. 'BH' by default, 'BY' or any value of p.adjust.methods." >
89 <option value="BH" selected="true">BH</option>
90 <option value="BY">BY</option>
91 <option value="bonferroni">bonferroni</option>
92 <option value="fdr">fdr</option>
93 <option value="hochberg">hochberg</option>
94 <option value="holm">holm</option>
95 <option value="hommel">hommel</option>
96 </param>
97 </macro>
98
99 <macro name="colors_param">
100 <param name="colors" type="text" size="100" value="dodgerblue,firebrick1,MediumVioletRed,SpringGreen,chartreuse,cyan,darkorchid,darkorange" label="Colors of each biological condition on the plots: 'col1,col2,col3,col4'" help="(-C, --colors) Separate the colors with a comma, no space allowed. Default are 'dodgerblue,firebrick1,MediumVioletRed,SpringGreen,chartreuse,cyan,darkorchid,darkorange'." >
101 <validator type="regex" message="Field requires a value. No space allowed.">\S+</validator>
102 </param>
103 </macro>
104
105 <macro name="outputs">
106 <data name="report_html" format="html" label="${tool.name} report" />
107 <data name="tables_html" format="html" label="${tool.name} tables" />
108 <data name="figures_html" format="html" label="${tool.name} figures" />
109 <data name="log" format="txt" label="${tool.name} R log" />
110 <data name="rdata" format="data" label="${tool.name} R objects (.RData)" />
111 </macro>
112
113 <token name="@HELP_AUTHORS@">
114 .. class:: infomark
115
116 **Authors** M.-A. Dillies and H. Varet
117
118 | If you use this tool, please cite: H. Varet, L. Brillet-Guéguen, J.-Y. Coppee and M.-A. Dillies, SARTools: A DESeq2- and EdgeR-Based R Pipeline for Comprehensive Differential Analysis of RNA-Seq Data, PLoS One, 2016, doi: http://dx.doi.org/10.1371/journal.pone.0157022
119 | For details about this tool, please go to https://github.com/PF2-pasteur-fr/SARTools
120
121 .. class:: infomark
122
123 **Galaxy integration** Loraine Brillet-Guéguen, Institut Français de Bioinformatique
124
125 | Contact support.abims@sb-roscoff.fr for any questions or concerns about the Galaxy implementation of this tool.
126
127 ---------------------------------------------------
128 </token>
129
130 <token name="@HELP_DESCRIPTION@">
131 | SARTools is a R package dedicated to the differential analysis of RNA-seq data. It provides tools to generate descriptive and diagnostic graphs, to run the differential analysis with one of the well known DESeq2 or edgeR packages and to export the results into easily readable tab-delimited files. It also facilitates the generation of a HTML report which displays all the figures produced, explains the statistical methods and gives the results of the differential analysis.
132 | Note that SARTools does not intend to replace DESeq2 or edgeR: it simply provides an environment to go with them. For more details about the methodology behind DESeq2 or edgeR, the user should read their documentations and papers.
133 </token>
134
135 <token name="@HELP_INPUT_FILES@">
136 .. class:: warningmark
137
138 If the counts and the target files are not supplied in the required formats, the workflow will probably crash and will not be able to run the analysis.
139
140
141 +---------------------------+-----------+
142 | Parameter : num + label | Format |
143 +===========================+===========+
144 | 1 : Design / target file | tabular |
145 +---------------------------+-----------+
146 | 2 : Raw counts files | zip |
147 +---------------------------+-----------+
148
149
150 Design/target file:
151 | The user has to supply a tab delimited file which describes the experiment, i.e. which contains the name of the biological condition associated with each sample. This file is called ”target” as a reference to the target file needed when using the limma package [1]. This file has one row per sample and is composed of at least three columns with headers:
152
153 * column 1 : unique names of the samples (short but informative as they will be displayed on all the figures);
154 * column 2 : name of the count files;
155 * column 3 : biological conditions;
156 * optional columns : further information about the samples (day of library preparation for example).
157
158
159 - Example of a target file::
160
161 label files group
162 s1c1 count_file_sample1_cond1.txt cond1
163 s2c1 count_file_sample2_cond1.txt cond1
164 s1c2 count_file_sample1_cond2.txt cond2
165 s2c2 count_file_sample2_cond2.txt cond2
166
167
168 Zip file containing raw counts files:
169 | The statistical analysis assumes that reads have already been mapped and that counts per feature (gene or transcript) are available. If counting has been done with HTSeq-count [2, 3], output files are ready to be loaded in R with the dedicated SARTools function. If not, the user must supply, in a zip file, one count file per sample with two tab delimited columns without header:
170
171 * column 1 : the unique IDs of the features;
172 * column 2 : the raw counts associated with these features (null or positive integers).
173 </token>
174
175 <token name="@HELP_BASIC_PARAMETERS@">
176 * **projectName:** name of the project;
177 * **author:** author of the analysis;
178 * **featuresToRemove:** character vector containing the IDs of the features to remove before running the analysis (default are "alignment not unique", "ambiguous", "no feature", "not aligned", "too low aQual" to remove HTSeq-count specific rows);
179 * **varInt:** variable of interest, i.e. biological condition, in the target file ("group" by default);
180 * **condRef:** reference biological condition used to compute fold-changes (no default, must be one of the levels of varInt);
181 </token>
182
183 <token name="@HELP_OUTPUT_FILES@">
184 **Report:**
185
186
187 | Give details about the methodology, the different steps and the results. It displays all the figures produced and the most important results of the differential analysis as the number of up- and down-regulated features.
188 | The user should read the full HTML report and closely analyze each figure to check that the analysis ran smoothly.
189
190
191 **Tables:**
192
193
194 * **TestVsRef.complete.txt:** contains all the features studied;
195 * **TestVsRef.down.txt:** contains only significant down-regulated features, i.e. less expressed in Test than in Ref;
196 * **TestVsRef.up.txt:** contains only significant up-regulated features i.e. more expressed in Test than in Ref.
197
198
199 **Figures:**
200
201
202 * **MAplot.png:** MA-plot for each comparison (log ratio of the means vs intensity).
203 * **PCA.png:** first and second factorial planes of the PCA on the samples based on VST or rlog data;
204 * **barplotNull.png:** percentage of null counts per sample;
205 * **barplotTC.png:** total number of reads per sample;
206 * **cluster.png:** hierachical clustering of the samples (based on VST or rlog data);
207 * **countsBoxplot.png:** boxplots on raw and normalized counts;
208 * **densplot.png:** estimation of the density of the counts for each sample;
209 * **diagSizeFactorsHist.png:** diagnostic of the estimation of the size factors;
210 * **diagSizeFactorsTC.png:** plot of the size factors vs the total number of reads;
211 * **dispersionsPlot.png:** graph of the estimations of the dispersions and diagnostic of log-linearity of the dispersions;
212 * **majSeq.png:** percentage of reads caught by the feature having the highest count in each sample;
213 * **pairwiseScatter.png:** pairwise scatter plot between each pair of samples and SERE values;
214 * **rawpHist.png:** histogram of the raw p-values for each comparison;
215 * **volcanoPlot.png:** vulcano plot for each comparison (− log10 (adjusted P value) vs log ratio of the means).
216
217
218 **R log file:**
219
220
221 | Give the R console outputs.
222
223
224 **R objects (.RData file):**
225
226
227 | Give all the R objects created during the analysis is saved: it may be used to perform downstream analyses.
228 </token>
229
230 <macro name="common_citations">
231 <citation type="doi">10.1371/journal.pone.0157022</citation>
232 <citation type="bibtex">@INBOOK{Smyth05,
233 author = {G.-K. Smyth},
234 editor = {R. Gentleman, V. Carey, S. Dudoit, R. Irizarry, and W. Huber},
235 chapter = {Limma: linear models for microarray data},
236 title = {Bioinformatics and Computational Biology Solutions Using R and Bioconductor},
237 publisher = {Springer},
238 year = {2005},
239 pages = {397–420}
240 }</citation>
241 <citation type="doi">10.1093/bioinformatics/btu638</citation>
242 <citation type="bibtex">@ARTICLE{Benjamini95,
243 author = {Y. Benjamini and Y. Hochberg},
244 title = {Controlling the false discovery rate: a practical and powerful approach to multiple testing},
245 journal = {Journal of the Royal Statistical Society B},
246 year = {1995},
247 volume = {57},
248 pages = {289–300}
249 }</citation>
250 <citation type="bibtex">@ARTICLE{Benjamini01,
251 author = {Y. Benjamini and D. Yekutieli},
252 title = {The control of the false discovery rate in multiple testing under dependency},
253 journal = {Ann. Statist.},
254 year = {2001},
255 volume = {29},
256 number = {4},
257 pages = {1165–1188}
258 }</citation>
259 </macro>
260
261 </macros>