comparison qualimap_rnaseq.xml @ 0:613e6446ea5d draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/qualimap commit b4d43001cc0caa14d760c347fa1c416929f769b2"
author iuc
date Thu, 10 Oct 2019 17:41:10 -0400
parents
children ce0da6c9f49e
comparison
equal deleted inserted replaced
-1:000000000000 0:613e6446ea5d
1 <tool id="qualimap_rnaseq" name="QualiMap RNA-Seq QC" version="@VERSION@">
2 <macros>
3 <import>qualimap_macros.xml</import>
4 </macros>
5 <expand macro="requirements" />
6 <expand macro="version_command" />
7 <command detect_errors="exit_code"><![CDATA[
8 @SET_JAVA_OPTS@ &&
9
10 qualimap rnaseq
11 -bam '${seq_info.input}'
12 -gtf '$features'
13 ${seq_info.treat_as_pe}
14 ${seq_info.sorted}
15 ${counts_out.report_counts}
16 --sequencing-protocol ${read_filtering.library_type}
17 --algorithm ${read_filtering.treat_multimappers}
18 -outdir results -outformat html &&
19
20 #set $report_name = 'qualimapReport'
21 #set $summary_report = 'rnaseq_qc_results.txt'
22 #if str($counts_out.report_counts):
23 #set $ccol_name = str($counts_out.ccol_name).strip() or str($seq_info.input.name).replace(' ', '_')
24 printf '#GeneID\t%s\n' '$ccol_name' > '$output_counts' &&
25 cat results/counts.txt >> '$output_counts' &&
26 #end if
27 @MASSAGE_OUTPUT@
28 ]]></command>
29 <inputs>
30 <conditional name="seq_info">
31 <param argument="-pe" name="treat_as_pe" type="select"
32 label="Counting mode"
33 help="You will usually want to choose 'Count fragments' for paired-end data. For single-end data, choose 'Count reads'. See tool help below.">
34 <option value="">Count reads</option>
35 <option value="--paired">Count fragments</option>
36 </param>
37 <when value="">
38 <param argument="-bam" name="input" type="data" format="bam"
39 label="Mapped reads input dataset" />
40 <param name="sorted" type="hidden" value="" />
41 </when>
42 <when value="--paired">
43 <param argument="-bam" name="input" type="data" format="qname_sorted.bam"
44 label="Mapped reads input dataset" />
45 <param name="sorted" type="hidden" value="--sorted" />
46 </when>
47 </conditional>
48 <param argument="-gtf" name="features" type="data" format="gtf"
49 label="Genome annotation data" />
50 <conditional name="counts_out">
51 <param argument="-oc" name="report_counts" type="select"
52 label="Keep the per-gene counts data?"
53 help="The resulting dataset can, for example, serve as input to QualiMap Counts QC for further assessment.">
54 <option value="">No, just report statistics</option>
55 <option value="-oc counts.txt">Yes, generate separate counts output</option>
56 </param>
57 <when value="" />
58 <when value="-oc counts.txt">
59 <param name="ccol_name" type="text"
60 label="Name to use for the counts column"
61 help="Consider using the name of the analyzed sample here. Default: Name of the mapped reads input dataset in the history" />
62 </when>
63 </conditional>
64 <section name="read_filtering" title="Read selection for counting" expanded="true">
65 <param argument="-p" name="library_type" type="select" display="radio"
66 label="Strandedness">
67 <option value="non-strand-specific">Count reads/fragments independent of strandedness</option>
68 <option value="strand-specific-forward">Count only reads/fragments expected in forward-stranded data</option>
69 <option value="strand-specific-reverse">Count only reads/fragments expected in reverse-stranded data</option>
70 </param>
71 <param argument="-a" name="treat_multimappers" type="select" display="radio"
72 label="Multimapping reads">
73 <option value="uniquely-mapped-reads">Count uniquely mapped reads only</option>
74 <option value="proportional">Count also multimapping reads</option>
75 </param>
76 </section>
77 </inputs>
78 <outputs>
79 <data name="output_html" format="html"
80 label="${tool.name} report on ${on_string}" />
81 <data name="output_counts" format="tsv"
82 label="${tool.name} counts on ${on_string}">
83 <filter>str(counts_out['report_counts'])</filter>
84 </data>
85 <collection name="raw_data" type="list"
86 label="Raw data for ${tool.name} on ${on_string}">
87 <data name="rnaseq_qc_results" format="txt" from_work_dir="results/summary_report.txt" />
88 <data name="coverage_profile_along_genes_high" format="tsv" from_work_dir="results/coverage_profile_along_genes_high.txt" />
89 <data name="coverage_profile_along_genes_low" format="tsv" from_work_dir="results/coverage_profile_along_genes_low.txt" />
90 <data name="coverage_profile_along_genes_total" format="tsv" from_work_dir="results/coverage_profile_along_genes_total.txt" />
91 </collection>
92 </outputs>
93 <tests>
94 <test expect_num_outputs="6">
95 <conditional name="seq_info">
96 <param name="treat_as_pe" value="" />
97 <param name="input" value="test_mapped_reads.bam" />
98 </conditional>
99 <param name="features" value="features.gtf" />
100 <output name="output_html" ftype="html">
101 <assert_contents>
102 <has_text text="Qualimap report: RNA Seq QC" />
103 </assert_contents>
104 </output>
105 <output_collection name="raw_data" type="list">
106 <element name="rnaseq_qc_results" file="rnaseq_qc_results_default.txt" ftype="txt" compare="diff" lines_diff="4" />
107 </output_collection>
108 </test>
109 <test expect_num_outputs="7">
110 <conditional name="seq_info">
111 <param name="treat_as_pe" value="--paired" />
112 <param name="input" value="test_mapped_reads.bam" />
113 </conditional>
114 <param name="features" value="features.gtf" />
115 <conditional name="counts_out">
116 <param name="report_counts" value="-oc counts.txt" />
117 <param name="ccol_name" value="try_this" />
118 </conditional>
119 <section name="read_filtering">
120 <param name="library_type" value="strand-specific-forward" />
121 <param name="treat_multimappers" value="proportional" />
122 </section>
123 <output name="output_html" ftype="html">
124 <assert_contents>
125 <has_text text="Qualimap report: RNA Seq QC" />
126 </assert_contents>
127 </output>
128 <output name="output_counts" file="rnaseq_qc_counts_custom.txt" ftype="tsv" />
129 <output_collection name="raw_data" type="list">
130 <element name="rnaseq_qc_results" file="rnaseq_qc_results_custom.txt" ftype="txt" compare="diff" lines_diff="4" />
131 </output_collection>
132 </test>
133 </tests>
134 <help><![CDATA[
135 **What it does**
136
137 **Qualimap RNA-Seq QC** reports quality control metrics and bias estimations
138 which are specific for whole transcriptome sequencing, including reads genomic
139 origin, junction analysis, transcript coverage and 5’-3’ bias computation.
140 As such, the tool complements the more general analysis with QualiMap BamQC,
141 and its (optional) gene counts output can be analyzed further with QualiMap
142 Counts QC.
143
144
145 Input
146 =====
147
148 *Mapped reads input dataset*
149
150 The dataset holding the mapped reads to carry out the analysis with. Typically,
151 this will have been produced by a splicing-aware aligner like *HISAT2* or *RNA
152 STAR*.
153
154 *Genome annotation data*
155
156 A GTF dataset of genomic features that mapped reads should be counted for.
157
158
159 Parameters
160 ----------
161
162 *Counting mode*
163
164 Determines whether reads should be counted individually, or whether multiple
165 reads originating from the same sequencing template (*i.e.*, the read and its
166 mate in paired-end sequencing) should be counted as one.
167
168 You will usually want to choose ``Count fragments`` for paired-end data. For
169 single-end data, choose ``Count reads``.
170
171 *Keep the per-gene counts data?*
172
173 Controls whether the optional Counts output dataset should be produced, or not.
174
175 If you choose to produce this dataset, you can use:
176
177 *Name to use for the counts column* to specify the name of the second column in
178 that output.
179
180 Using, for example, the name of the analyzed sample here can help you keep
181 track of your data, especially when joining several counts datasets into a
182 count matrix later on. In addition, *Qualimap Counts QC* will reuse the
183 names of counts columns as sample names.
184
185 **Read selection for counting** section
186
187 *Strandedness*
188
189 Choose here the option that fits the strand-specificity of your sequencing
190 library.
191
192 The Galaxy Training Material has an excellent discussion of sequencing
193 data strandedness included in the
194 `Reference-based RNA-Seq data analysis <https://galaxyproject.github.io/training-material/topics/transcriptomics/tutorials/ref-based/tutorial.html#count-the-number-of-reads-per-annotated-gene>`__
195 tutorial.
196
197 *Multimapping reads*
198
199 Choose here how to treat reads that are mapped ambiguously to several genome locations.
200
201 - *Count uniquely mapped reads only* excludes multi-mapping reads
202
203 - *Count also multimapping reads* activates *proportional* counting of
204 multi-mapping reads.
205
206 In this mode, each read is weighted according to the number of mapped
207 locations. For example, a read mapped to 4 different locations will add 0.25
208 to the "counts" of each of the locations it maps to. The final calculated
209 counts per feature will be converted to integer numbers.
210
211 Note: Detection of multi-mapping reads by the tool relies on the ``NH`` tag of
212 reads in the BAM input, so make sure the aligner used to produce the dataset is
213 configured to write this tag.
214
215
216 Outputs
217 =======
218
219 HTML Report
220 -----------
221
222 **Summary Section**
223
224 *Reads alignment*
225
226 Summarizes the mapping characteristics of the reads in the input:
227
228 - total number of mapped reads
229
230 reported as left/right read mates in case of paired-end reads; excludes
231 secondary alignments
232
233 If you accidentally selected `Count fragments` as the *Counting mode* for
234 single-end data these and the following count of *Number of aligned pairs*
235 will be zero.
236
237 - total number of alignments
238
239 reports all alignment records found, including secondary alignments
240
241 - number of secondary alignments
242
243 - number of non-unique alignments
244
245 reports the number of alignment records with an ``NH`` tag greater than one;
246 corresponds to the number of alignments that will have been skipped during
247 counting when *Count uniquely mapped reads only* is selected
248
249 - number of reads aligned to genes
250
251 - number of ambiguous alignments
252
253 This is the number of mapped reads that span multiple annotated genes.
254 Such reads are always skipped during counting.
255
256 - no feature assigned
257
258 reports the number of alignments that are not overlapping any annotated
259 feature; these may represent alignments to introns or intergenic regions, or,
260 if the number is really high, may indicate a problem with your genome
261 annotations
262
263 - not aligned
264
265 number of reads not mapped by the aligner (but included in the BAM input)
266
267 - strand specificity estimation (fwd/rev)
268
269 computed if *Count reads/fragments independent of strandedness* is selected;
270 estimate of the proportion of alignments in line with forward- and reverse-
271 strand-specificitiy of the sequencing library
272
273 Balanced proportions (*i.e.* ~ 0.5 forward- and ~ 0.5 reverse-strand support)
274 can be interpreted as likely non-strand-specificity of the sequencing library,
275 while a strand-specific library would manifest itself in a large fraction of
276 reads supporting that specific strand-specificity.
277
278 *Reads genomic origin*
279
280 Lists how many alignments (absolute number/fraction) fall into
281
282 - exonic,
283 - intronic,
284 - intergenic
285
286 regions, or are at least
287
288 - overlapping an exon.
289
290 *Transcript coverage profile*
291
292 The profile provides ratios between mean coverage of 5’ regions, 3’ regions and whole transcripts.
293
294 - 5’ bias
295
296 the ratio of coverage median of 5’ regions (defined as the first 100 nts) to whole transcripts
297
298 - 3' bias
299
300 the ratio of coverage median of 3’ regions (defined as the last 100 nts) to whole transcripts
301
302 - 5’-3’ bias
303
304 the ratio of 5' bias to 3' bias.
305
306 *Junction analysis*
307
308 Lists the total number of reads with splice junctions and the relative
309 frequency of the (up to) 10 most frequent junction sequences.
310
311
312 **Plots**
313
314 *Reads Genomic Origin*
315
316 A pie chart showing how many read alignments fall into exonic, intronic and
317 intergenic regions.
318
319 *Coverage Profile Along Genes (Total)*
320
321 This plot shows the mean coverage profile of all genes with non-zero
322 overall coverage.
323
324 *Coverage Profile Along Genes (Low)*
325
326 The plot shows the mean coverage profile of the 500 genes with the lowest, but non-zero overall coverage.
327
328 *Coverage Profile Along Genes (High)*
329
330 The plot shows the mean coverage profile of the 500 genes with the highest
331 overall coverage.
332
333 *Coverage Histogram (0-50x)*
334
335 Coverage of genes from 0 to 50x. Genes with >50x coverage are added to the 50x
336 bin.
337
338 *Junction Analysis*
339
340 This pie chart shows an analysis of the splice junctions observed in the
341 alignments. It consists of three categories:
342
343 - Known
344
345 observed splice junctions both sides of which are in line with the genome
346 annotation data
347
348 - Partly known
349
350 observed splice junctions for which only one junction side can be deduced
351 from the genome annotation data
352
353 - Novel
354
355 observed splice junctions not predicted on either side by the genome
356 annotation data
357
358
359 Raw data
360 --------
361
362 This is a *Collection* of 4 individual datasets.
363
364 Of these, the *rnaseq_qc_results* dataset provides a plain-text version of the
365 *HTML report* *Summary* section.
366
367 The other 3 datasets hold the tabular raw data underlying the three coverage
368 profile plots in the *HTML Report*.
369
370
371 Counts data
372 -----------
373
374 Optional. This is a 2-column tabular dataset of read or fragment counts
375 (depending on the chosen *Counting mode*) per annotated gene. The first column
376 lists the gene identifiers found in the *Genome annotation data*, the second
377 the associated counts.
378
379 This dataset represents valid (single-sample) input for the QualiMap Counts QC
380 tool.
381 ]]></help>
382 <expand macro="citations"/>
383 </tool>