Mercurial > repos > iuc > qualimap_rnaseq
comparison qualimap_rnaseq.xml @ 0:613e6446ea5d draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/qualimap commit b4d43001cc0caa14d760c347fa1c416929f769b2"
author | iuc |
---|---|
date | Thu, 10 Oct 2019 17:41:10 -0400 |
parents | |
children | ce0da6c9f49e |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:613e6446ea5d |
---|---|
1 <tool id="qualimap_rnaseq" name="QualiMap RNA-Seq QC" version="@VERSION@"> | |
2 <macros> | |
3 <import>qualimap_macros.xml</import> | |
4 </macros> | |
5 <expand macro="requirements" /> | |
6 <expand macro="version_command" /> | |
7 <command detect_errors="exit_code"><![CDATA[ | |
8 @SET_JAVA_OPTS@ && | |
9 | |
10 qualimap rnaseq | |
11 -bam '${seq_info.input}' | |
12 -gtf '$features' | |
13 ${seq_info.treat_as_pe} | |
14 ${seq_info.sorted} | |
15 ${counts_out.report_counts} | |
16 --sequencing-protocol ${read_filtering.library_type} | |
17 --algorithm ${read_filtering.treat_multimappers} | |
18 -outdir results -outformat html && | |
19 | |
20 #set $report_name = 'qualimapReport' | |
21 #set $summary_report = 'rnaseq_qc_results.txt' | |
22 #if str($counts_out.report_counts): | |
23 #set $ccol_name = str($counts_out.ccol_name).strip() or str($seq_info.input.name).replace(' ', '_') | |
24 printf '#GeneID\t%s\n' '$ccol_name' > '$output_counts' && | |
25 cat results/counts.txt >> '$output_counts' && | |
26 #end if | |
27 @MASSAGE_OUTPUT@ | |
28 ]]></command> | |
29 <inputs> | |
30 <conditional name="seq_info"> | |
31 <param argument="-pe" name="treat_as_pe" type="select" | |
32 label="Counting mode" | |
33 help="You will usually want to choose 'Count fragments' for paired-end data. For single-end data, choose 'Count reads'. See tool help below."> | |
34 <option value="">Count reads</option> | |
35 <option value="--paired">Count fragments</option> | |
36 </param> | |
37 <when value=""> | |
38 <param argument="-bam" name="input" type="data" format="bam" | |
39 label="Mapped reads input dataset" /> | |
40 <param name="sorted" type="hidden" value="" /> | |
41 </when> | |
42 <when value="--paired"> | |
43 <param argument="-bam" name="input" type="data" format="qname_sorted.bam" | |
44 label="Mapped reads input dataset" /> | |
45 <param name="sorted" type="hidden" value="--sorted" /> | |
46 </when> | |
47 </conditional> | |
48 <param argument="-gtf" name="features" type="data" format="gtf" | |
49 label="Genome annotation data" /> | |
50 <conditional name="counts_out"> | |
51 <param argument="-oc" name="report_counts" type="select" | |
52 label="Keep the per-gene counts data?" | |
53 help="The resulting dataset can, for example, serve as input to QualiMap Counts QC for further assessment."> | |
54 <option value="">No, just report statistics</option> | |
55 <option value="-oc counts.txt">Yes, generate separate counts output</option> | |
56 </param> | |
57 <when value="" /> | |
58 <when value="-oc counts.txt"> | |
59 <param name="ccol_name" type="text" | |
60 label="Name to use for the counts column" | |
61 help="Consider using the name of the analyzed sample here. Default: Name of the mapped reads input dataset in the history" /> | |
62 </when> | |
63 </conditional> | |
64 <section name="read_filtering" title="Read selection for counting" expanded="true"> | |
65 <param argument="-p" name="library_type" type="select" display="radio" | |
66 label="Strandedness"> | |
67 <option value="non-strand-specific">Count reads/fragments independent of strandedness</option> | |
68 <option value="strand-specific-forward">Count only reads/fragments expected in forward-stranded data</option> | |
69 <option value="strand-specific-reverse">Count only reads/fragments expected in reverse-stranded data</option> | |
70 </param> | |
71 <param argument="-a" name="treat_multimappers" type="select" display="radio" | |
72 label="Multimapping reads"> | |
73 <option value="uniquely-mapped-reads">Count uniquely mapped reads only</option> | |
74 <option value="proportional">Count also multimapping reads</option> | |
75 </param> | |
76 </section> | |
77 </inputs> | |
78 <outputs> | |
79 <data name="output_html" format="html" | |
80 label="${tool.name} report on ${on_string}" /> | |
81 <data name="output_counts" format="tsv" | |
82 label="${tool.name} counts on ${on_string}"> | |
83 <filter>str(counts_out['report_counts'])</filter> | |
84 </data> | |
85 <collection name="raw_data" type="list" | |
86 label="Raw data for ${tool.name} on ${on_string}"> | |
87 <data name="rnaseq_qc_results" format="txt" from_work_dir="results/summary_report.txt" /> | |
88 <data name="coverage_profile_along_genes_high" format="tsv" from_work_dir="results/coverage_profile_along_genes_high.txt" /> | |
89 <data name="coverage_profile_along_genes_low" format="tsv" from_work_dir="results/coverage_profile_along_genes_low.txt" /> | |
90 <data name="coverage_profile_along_genes_total" format="tsv" from_work_dir="results/coverage_profile_along_genes_total.txt" /> | |
91 </collection> | |
92 </outputs> | |
93 <tests> | |
94 <test expect_num_outputs="6"> | |
95 <conditional name="seq_info"> | |
96 <param name="treat_as_pe" value="" /> | |
97 <param name="input" value="test_mapped_reads.bam" /> | |
98 </conditional> | |
99 <param name="features" value="features.gtf" /> | |
100 <output name="output_html" ftype="html"> | |
101 <assert_contents> | |
102 <has_text text="Qualimap report: RNA Seq QC" /> | |
103 </assert_contents> | |
104 </output> | |
105 <output_collection name="raw_data" type="list"> | |
106 <element name="rnaseq_qc_results" file="rnaseq_qc_results_default.txt" ftype="txt" compare="diff" lines_diff="4" /> | |
107 </output_collection> | |
108 </test> | |
109 <test expect_num_outputs="7"> | |
110 <conditional name="seq_info"> | |
111 <param name="treat_as_pe" value="--paired" /> | |
112 <param name="input" value="test_mapped_reads.bam" /> | |
113 </conditional> | |
114 <param name="features" value="features.gtf" /> | |
115 <conditional name="counts_out"> | |
116 <param name="report_counts" value="-oc counts.txt" /> | |
117 <param name="ccol_name" value="try_this" /> | |
118 </conditional> | |
119 <section name="read_filtering"> | |
120 <param name="library_type" value="strand-specific-forward" /> | |
121 <param name="treat_multimappers" value="proportional" /> | |
122 </section> | |
123 <output name="output_html" ftype="html"> | |
124 <assert_contents> | |
125 <has_text text="Qualimap report: RNA Seq QC" /> | |
126 </assert_contents> | |
127 </output> | |
128 <output name="output_counts" file="rnaseq_qc_counts_custom.txt" ftype="tsv" /> | |
129 <output_collection name="raw_data" type="list"> | |
130 <element name="rnaseq_qc_results" file="rnaseq_qc_results_custom.txt" ftype="txt" compare="diff" lines_diff="4" /> | |
131 </output_collection> | |
132 </test> | |
133 </tests> | |
134 <help><![CDATA[ | |
135 **What it does** | |
136 | |
137 **Qualimap RNA-Seq QC** reports quality control metrics and bias estimations | |
138 which are specific for whole transcriptome sequencing, including reads genomic | |
139 origin, junction analysis, transcript coverage and 5’-3’ bias computation. | |
140 As such, the tool complements the more general analysis with QualiMap BamQC, | |
141 and its (optional) gene counts output can be analyzed further with QualiMap | |
142 Counts QC. | |
143 | |
144 | |
145 Input | |
146 ===== | |
147 | |
148 *Mapped reads input dataset* | |
149 | |
150 The dataset holding the mapped reads to carry out the analysis with. Typically, | |
151 this will have been produced by a splicing-aware aligner like *HISAT2* or *RNA | |
152 STAR*. | |
153 | |
154 *Genome annotation data* | |
155 | |
156 A GTF dataset of genomic features that mapped reads should be counted for. | |
157 | |
158 | |
159 Parameters | |
160 ---------- | |
161 | |
162 *Counting mode* | |
163 | |
164 Determines whether reads should be counted individually, or whether multiple | |
165 reads originating from the same sequencing template (*i.e.*, the read and its | |
166 mate in paired-end sequencing) should be counted as one. | |
167 | |
168 You will usually want to choose ``Count fragments`` for paired-end data. For | |
169 single-end data, choose ``Count reads``. | |
170 | |
171 *Keep the per-gene counts data?* | |
172 | |
173 Controls whether the optional Counts output dataset should be produced, or not. | |
174 | |
175 If you choose to produce this dataset, you can use: | |
176 | |
177 *Name to use for the counts column* to specify the name of the second column in | |
178 that output. | |
179 | |
180 Using, for example, the name of the analyzed sample here can help you keep | |
181 track of your data, especially when joining several counts datasets into a | |
182 count matrix later on. In addition, *Qualimap Counts QC* will reuse the | |
183 names of counts columns as sample names. | |
184 | |
185 **Read selection for counting** section | |
186 | |
187 *Strandedness* | |
188 | |
189 Choose here the option that fits the strand-specificity of your sequencing | |
190 library. | |
191 | |
192 The Galaxy Training Material has an excellent discussion of sequencing | |
193 data strandedness included in the | |
194 `Reference-based RNA-Seq data analysis <https://galaxyproject.github.io/training-material/topics/transcriptomics/tutorials/ref-based/tutorial.html#count-the-number-of-reads-per-annotated-gene>`__ | |
195 tutorial. | |
196 | |
197 *Multimapping reads* | |
198 | |
199 Choose here how to treat reads that are mapped ambiguously to several genome locations. | |
200 | |
201 - *Count uniquely mapped reads only* excludes multi-mapping reads | |
202 | |
203 - *Count also multimapping reads* activates *proportional* counting of | |
204 multi-mapping reads. | |
205 | |
206 In this mode, each read is weighted according to the number of mapped | |
207 locations. For example, a read mapped to 4 different locations will add 0.25 | |
208 to the "counts" of each of the locations it maps to. The final calculated | |
209 counts per feature will be converted to integer numbers. | |
210 | |
211 Note: Detection of multi-mapping reads by the tool relies on the ``NH`` tag of | |
212 reads in the BAM input, so make sure the aligner used to produce the dataset is | |
213 configured to write this tag. | |
214 | |
215 | |
216 Outputs | |
217 ======= | |
218 | |
219 HTML Report | |
220 ----------- | |
221 | |
222 **Summary Section** | |
223 | |
224 *Reads alignment* | |
225 | |
226 Summarizes the mapping characteristics of the reads in the input: | |
227 | |
228 - total number of mapped reads | |
229 | |
230 reported as left/right read mates in case of paired-end reads; excludes | |
231 secondary alignments | |
232 | |
233 If you accidentally selected `Count fragments` as the *Counting mode* for | |
234 single-end data these and the following count of *Number of aligned pairs* | |
235 will be zero. | |
236 | |
237 - total number of alignments | |
238 | |
239 reports all alignment records found, including secondary alignments | |
240 | |
241 - number of secondary alignments | |
242 | |
243 - number of non-unique alignments | |
244 | |
245 reports the number of alignment records with an ``NH`` tag greater than one; | |
246 corresponds to the number of alignments that will have been skipped during | |
247 counting when *Count uniquely mapped reads only* is selected | |
248 | |
249 - number of reads aligned to genes | |
250 | |
251 - number of ambiguous alignments | |
252 | |
253 This is the number of mapped reads that span multiple annotated genes. | |
254 Such reads are always skipped during counting. | |
255 | |
256 - no feature assigned | |
257 | |
258 reports the number of alignments that are not overlapping any annotated | |
259 feature; these may represent alignments to introns or intergenic regions, or, | |
260 if the number is really high, may indicate a problem with your genome | |
261 annotations | |
262 | |
263 - not aligned | |
264 | |
265 number of reads not mapped by the aligner (but included in the BAM input) | |
266 | |
267 - strand specificity estimation (fwd/rev) | |
268 | |
269 computed if *Count reads/fragments independent of strandedness* is selected; | |
270 estimate of the proportion of alignments in line with forward- and reverse- | |
271 strand-specificitiy of the sequencing library | |
272 | |
273 Balanced proportions (*i.e.* ~ 0.5 forward- and ~ 0.5 reverse-strand support) | |
274 can be interpreted as likely non-strand-specificity of the sequencing library, | |
275 while a strand-specific library would manifest itself in a large fraction of | |
276 reads supporting that specific strand-specificity. | |
277 | |
278 *Reads genomic origin* | |
279 | |
280 Lists how many alignments (absolute number/fraction) fall into | |
281 | |
282 - exonic, | |
283 - intronic, | |
284 - intergenic | |
285 | |
286 regions, or are at least | |
287 | |
288 - overlapping an exon. | |
289 | |
290 *Transcript coverage profile* | |
291 | |
292 The profile provides ratios between mean coverage of 5’ regions, 3’ regions and whole transcripts. | |
293 | |
294 - 5’ bias | |
295 | |
296 the ratio of coverage median of 5’ regions (defined as the first 100 nts) to whole transcripts | |
297 | |
298 - 3' bias | |
299 | |
300 the ratio of coverage median of 3’ regions (defined as the last 100 nts) to whole transcripts | |
301 | |
302 - 5’-3’ bias | |
303 | |
304 the ratio of 5' bias to 3' bias. | |
305 | |
306 *Junction analysis* | |
307 | |
308 Lists the total number of reads with splice junctions and the relative | |
309 frequency of the (up to) 10 most frequent junction sequences. | |
310 | |
311 | |
312 **Plots** | |
313 | |
314 *Reads Genomic Origin* | |
315 | |
316 A pie chart showing how many read alignments fall into exonic, intronic and | |
317 intergenic regions. | |
318 | |
319 *Coverage Profile Along Genes (Total)* | |
320 | |
321 This plot shows the mean coverage profile of all genes with non-zero | |
322 overall coverage. | |
323 | |
324 *Coverage Profile Along Genes (Low)* | |
325 | |
326 The plot shows the mean coverage profile of the 500 genes with the lowest, but non-zero overall coverage. | |
327 | |
328 *Coverage Profile Along Genes (High)* | |
329 | |
330 The plot shows the mean coverage profile of the 500 genes with the highest | |
331 overall coverage. | |
332 | |
333 *Coverage Histogram (0-50x)* | |
334 | |
335 Coverage of genes from 0 to 50x. Genes with >50x coverage are added to the 50x | |
336 bin. | |
337 | |
338 *Junction Analysis* | |
339 | |
340 This pie chart shows an analysis of the splice junctions observed in the | |
341 alignments. It consists of three categories: | |
342 | |
343 - Known | |
344 | |
345 observed splice junctions both sides of which are in line with the genome | |
346 annotation data | |
347 | |
348 - Partly known | |
349 | |
350 observed splice junctions for which only one junction side can be deduced | |
351 from the genome annotation data | |
352 | |
353 - Novel | |
354 | |
355 observed splice junctions not predicted on either side by the genome | |
356 annotation data | |
357 | |
358 | |
359 Raw data | |
360 -------- | |
361 | |
362 This is a *Collection* of 4 individual datasets. | |
363 | |
364 Of these, the *rnaseq_qc_results* dataset provides a plain-text version of the | |
365 *HTML report* *Summary* section. | |
366 | |
367 The other 3 datasets hold the tabular raw data underlying the three coverage | |
368 profile plots in the *HTML Report*. | |
369 | |
370 | |
371 Counts data | |
372 ----------- | |
373 | |
374 Optional. This is a 2-column tabular dataset of read or fragment counts | |
375 (depending on the chosen *Counting mode*) per annotated gene. The first column | |
376 lists the gene identifiers found in the *Genome annotation data*, the second | |
377 the associated counts. | |
378 | |
379 This dataset represents valid (single-sample) input for the QualiMap Counts QC | |
380 tool. | |
381 ]]></help> | |
382 <expand macro="citations"/> | |
383 </tool> |