Mercurial > repos > iuc > rnaquast
comparison rna_quast.xml @ 5:f89e3c318453 draft
planemo upload for repository https://git.ufz.de/lehmanju/rnaquast commit c633f5c634128e3c81ab48e94df6f703dd005c46
author | iuc |
---|---|
date | Wed, 07 Jun 2023 12:02:03 +0000 |
parents | f9f2ad782d8f |
children | 8e66f695d859 |
comparison
equal
deleted
inserted
replaced
4:f9f2ad782d8f | 5:f89e3c318453 |
---|---|
1 <tool id="rna_quast" name="rnaQUAST" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"> | 1 <tool id="rna_quast" name="rnaQUAST" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> |
2 <description>A quality assessment tool for De Novo transcriptome assemblies</description> | 2 <description>A quality assessment tool for De Novo transcriptome assemblies</description> |
3 <xrefs> | |
4 <xref type="bio.tools">rnaQUAST</xref> | |
5 </xrefs> | |
6 <macros> | 3 <macros> |
7 <token name="@TOOL_VERSION@">2.2.1</token> | 4 <import>macros.xml</import> |
8 <token name="@VERSION_SUFFIX@">1</token> | |
9 <xml name="element_matching_line" token_name="" token_expression=""> | |
10 <element name="@NAME@"> | |
11 <assert_contents> | |
12 <has_line_matching expression="@EXPRESSION@" /> | |
13 </assert_contents> | |
14 </element> | |
15 </xml> | |
16 <xml name="element_has_text" token_name="" token_text=""> | |
17 <element name="@NAME@"> | |
18 <assert_contents> | |
19 <has_text text="@TEXT@" /> | |
20 </assert_contents> | |
21 </element> | |
22 </xml> | |
23 <xml name="details_output_test" token_assembler=""> | |
24 <element name="@ASSEMBLER@"> | |
25 <element name="5000%-assembled.list"> | |
26 <assert_contents> | |
27 <has_n_lines n="0" /> | |
28 </assert_contents> | |
29 </element> | |
30 <element name="9500%-assembled.list"> | |
31 <assert_contents> | |
32 <has_n_lines n="0" /> | |
33 </assert_contents> | |
34 </element> | |
35 <expand macro="element_matching_line" name="alignment_metrics" expression="\s*== ALIGNMENT METRICS \(calculated with reference genome but without gene database\) ==\s*" /> | |
36 <expand macro="element_matching_line" name="alignment_multiplicity" expression="unaligned=\d+ aligned=\d+ alignments=\d+\s*" /> | |
37 <expand macro="element_matching_line" name="alignments_per_isoform" expression="avg=[\d.]+\s*" /> | |
38 <expand macro="element_matching_line" name="basic_metrics" expression="\s*== BASIC TRANSCRIPTS METRICS \(calculated without reference genome and gene database\) ==\s*" /> | |
39 <expand macro="element_matching_line" name="block_length" expression="avg=[\d.]+\s*" /> | |
40 <expand macro="element_matching_line" name="blocks_per_alignment" expression="avg=[\d.]+\s+tot=\d+\s*" /> | |
41 <expand macro="element_matching_line" name="database_metrics" expression="\s*== GENE DATABASE METRICS ==\s*" /> | |
42 <expand macro="element_matching_line" name="misassemblies" expression="\s*== ALIGNMENT METRICS FOR MISASSEMBLED \(CHIMERIC\) TRANSCRIPTS \(calculated with reference genome or with gene database\) ==\s*" /> | |
43 <expand macro="element_matching_line" name="mismatch_rate" expression="avg=[\d.]+\s+tot=\d+\s*" /> | |
44 <expand macro="element_matching_line" name="sensitivity" expression="\s*== ASSEMBLY COMPLETENESS \(SENSITIVITY\) ==\s*" /> | |
45 <expand macro="element_matching_line" name="specificity" expression="\s*== ASSEMBLY SPECIFICITY ==\s*" /> | |
46 <expand macro="element_matching_line" name="transcript_length" expression="avg=[\d.]+\s*" /> | |
47 <expand macro="element_matching_line" name="x-aligned" expression="avg=[\d.]+\s*" /> | |
48 <expand macro="element_matching_line" name="x-assembled" expression="avg=[\d.]+\s*" /> | |
49 <expand macro="element_matching_line" name="x-assembled_exons" expression="avg=[\d.]+\s*" /> | |
50 <expand macro="element_matching_line" name="x-covered" expression="avg=[\d.]+\s*" /> | |
51 <expand macro="element_matching_line" name="x-covered_exons" expression="avg=[\d.]+\s*" /> | |
52 <expand macro="element_matching_line" name="x-matched" expression="avg=[\d.]+\s*" /> | |
53 <expand macro="element_matching_line" name="x-matched_blocks" expression="avg=[\d.]+\s*" /> | |
54 </element> | |
55 </xml> | |
56 | |
57 <xml name="txt_output_test" token_assembler=""> | |
58 <output name="short_report_txt"> | |
59 <assert_contents> | |
60 <has_text text="SHORT SUMMARY REPORT" /> | |
61 </assert_contents> | |
62 </output> | |
63 </xml> | |
64 <xml name="tex_output_test" token_assembler=""> | |
65 <output name="short_report_tex"> | |
66 <assert_contents> | |
67 <has_text text="Short summary report" /> | |
68 <has_text text="end{document}" /> | |
69 </assert_contents> | |
70 </output> | |
71 </xml> | |
72 <xml name="tsv_output_test" token_assembler=""> | |
73 <output name="short_report_tsv"> | |
74 <assert_contents> | |
75 <has_line_matching expression="^METRICS/TRANSCRIPTS\t.+$" /> | |
76 </assert_contents> | |
77 </output> | |
78 </xml> | |
79 <xml name="pdf_output_test" token_assembler=""> | |
80 <output name="short_report_pdf"> | |
81 <assert_contents> | |
82 <has_text text="rnaQUAST short report" /> | |
83 </assert_contents> | |
84 </output> | |
85 </xml> | |
86 </macros> | 5 </macros> |
87 <requirements> | 6 <expand macro='xrefs'/> |
88 <requirement type="package" version="@TOOL_VERSION@">rnaquast</requirement> | 7 <expand macro='requirements'/> |
89 </requirements> | |
90 <stdio> | 8 <stdio> |
91 <regex match="Traceback " source="both" level="fatal" description="rnaQuast failed" /> | 9 <regex match="Traceback " source="both" level="fatal" description="rnaQuast failed" /> |
92 </stdio> | 10 </stdio> |
93 <command detect_errors="exit_code"><![CDATA[ | 11 <command detect_errors="exit_code"><![CDATA[ |
94 #import re | 12 mkdir -p './complete_reports/' && |
95 #for $i in $transcripts | 13 mkdir -p './fasta_files/' && |
14 #import os, re, glob | |
15 #for $i in $transcripts | |
96 ln -s '$i' '${re.sub('[^\w\-.]', '_', i.element_identifier)}' && | 16 ln -s '$i' '${re.sub('[^\w\-.]', '_', i.element_identifier)}' && |
97 #end for | 17 #end for |
98 #if $r | 18 #if $reference |
99 #for $rf in $r | 19 #for $rf in $reference |
100 ln -s '$rf' '${re.sub('[^\w\-.]', '_', rf.element_identifier)}' && | 20 ln -s '$rf' '${re.sub('[^\w\-.]', '_', rf.element_identifier)}' && |
101 #end for | 21 #end for |
102 #end if | 22 #end if |
103 #if $gene_coordinates.use_gtf == "true" | 23 #if $gene_coordinates.selector == "true" |
104 #for $g in $gene_coordinates.gtf | 24 #for $g in $gene_coordinates.gtf |
105 ln -s '$g' '${re.sub('[^\w\-.]', '_', g.element_identifier)}' && | 25 ln -s '$g' '${re.sub('[^\w\-.]', '_', g.element_identifier)}' && |
106 #end for | 26 #end for |
107 #end if | 27 #end if |
108 mkdir outputdir && | 28 mkdir outputdir && |
109 rnaQUAST.py | 29 rnaQUAST.py |
110 --threads \${GALAXY_SLOTS:-1} | 30 --threads \${GALAXY_SLOTS:-8} |
111 --transcripts | 31 --transcripts |
112 #for $i in $transcripts | 32 #for $i in $transcripts |
113 '${re.sub('[^\w\-.]', '_', i.element_identifier)}' | 33 '${re.sub('[^\w\-.]', '_', i.element_identifier)}' |
114 #end for | 34 #end for |
115 $strand_specific | 35 #if $reads_option.selector == 'paired' |
116 #if $r | 36 --left_reads '${reads_option.forward_reads}' |
37 --right_reads '${reads_option.reverse_reads}' | |
38 #else if $reads_option.selector == 'single' | |
39 --single_reads '${reads_option.single_reads}' | |
40 #end if | |
41 $advanced_options.strand_specific | |
42 #if $reads_alignment | |
43 --reads_alignment '${reads_alignment}' | |
44 #end if | |
45 #if $reference | |
117 -r | 46 -r |
118 #for $rf in $r | 47 #for $rf in $reference |
119 '${re.sub('[^\w\-.]', '_', rf.element_identifier)}' | 48 '${re.sub('[^\w\-.]', '_', rf.element_identifier)}' |
120 #end for | 49 #end for |
121 #end if | 50 #end if |
122 #if $gene_coordinates.use_gtf == "true" | 51 #if $gene_coordinates.selector == "true" |
123 --gtf | 52 --gtf |
124 #for $g in $gene_coordinates.gtf | 53 #for $g in $gene_coordinates.gtf |
125 '${re.sub('[^\w\-.]', '_', g.element_identifier)}' | 54 '${re.sub('[^\w\-.]', '_', g.element_identifier)}' |
126 #end for | 55 #end for |
127 $gene_coordinates.disable_infer_genes | 56 $gene_coordinates.disable_infer_genes |
128 $gene_coordinates.disable_infer_transcripts | 57 $gene_coordinates.disable_infer_transcripts |
129 #end if | 58 #end if |
130 $prokaryote | 59 $advanced_options.prokaryote |
131 --min_alignment '$min_alignment' | 60 --min_alignment $advanced_options.min_alignment |
132 #if "pdf" not in $out_sr and "plots" not in $out_add | 61 $advanced_options.blat |
62 | |
63 #if "pdf" not in $output_options.out_sr | |
133 --no_plots | 64 --no_plots |
134 #end if | 65 #end if |
135 $blat | 66 #if $use_busco.selector == 'true' |
136 #if $busco_option.busco == 'true' | 67 --busco |
137 --busco $busco_option.lineage | 68 #if $use_busco.lineage_conditional.selector == 'cached': |
138 #end if | 69 '${use_busco.lineage_conditional.cached_db.fields.path}' |
139 ##$gene_mark | 70 #else |
140 $meta | 71 $use_busco.lineage |
141 --lower_threshold $lower_threshold | 72 #end if |
142 --upper_threshold $upper_threshold | 73 #end if |
74 ## $advanced_options.gene_mark | |
75 $advanced_options.meta | |
76 --lower_threshold $advanced_options.lower_threshold | |
77 --upper_threshold $advanced_options.upper_threshold | |
143 -o outputdir | 78 -o outputdir |
144 | 79 |
145 && mkdir details | 80 #if 'gz' in $output_options.out_add |
146 | 81 && tar -czvf results.tar.gz './outputdir' |
147 ## move per outputs that are generated for each input (outputdir/ASSEMBLER_output) | 82 #end if |
148 ## to a joint dir (details) to make them discoverable | 83 |
149 ## also remove "ASSEMBLER." prefixes from files (otherwise the test macros don't work) | 84 #if len($transcripts) == 1 |
150 #for $i in $transcripts | 85 #set $path = "/".join(['outputdir',($transcripts[0].element_identifier).split(".")[0]]) + "_output" |
151 #set basename = os.path.splitext(re.sub('[^\w\-.]', '_', $i.element_identifier))[0] | 86 && mv '${path}' './results' |
152 && | 87 ## rename .list files to .txt files to make them detectable |
153 (for f in \$(find 'outputdir/'$basename'_output' -type f); | 88 && find './results/' -name "*.list" -exec mv {} {}.txt \; |
154 do | 89 && true |
155 d=\$(dirname \$f | cut -d"/" -f2 | cut -d'_' -f1) && | 90 && printf "************ METRICS/TRANSCRIPTS ***************\n" > stats.txt |
156 mv \$f details/"\$d"_____"\$(basename \$f | sed 's/$basename\.//')"; | 91 && for file_name in ./results/*txt; do printf "\n************ \$file_name ************\n" >> stats.txt |
157 done) | 92 && sed 's/^ ==.*/&\n/' \$file_name | tail -q -n +2 "\$file_name" >> stats.txt; |
158 #end for | 93 done |
159 | 94 && cat stats.txt > $stats |
160 ## rename .list files to .txt files to make them detectable (format detection by extension) | 95 #if $gene_coordinates.selector == 'true' and $reference |
161 ## the final `true` seems needed since otherwise the `;` at the end is swallowed | 96 && mv ./results/*fasta ./fasta_files/ |
162 && find details/ -name "*.list" -exec mv {} {}.txt \; | 97 #end if |
163 && true | 98 #else |
99 && mkdir -p './results/' | |
100 #if $gene_coordinates.selector == 'true' and $reference | |
101 #for $i, $transcript in enumerate($transcripts) | |
102 #set $path = "/".join(['outputdir',($transcripts[$i].element_identifier).split(".")[0]]) + "_output" | |
103 && rm -r ./results | |
104 && cp -r $path './results' | |
105 && mv ./results/*fasta './fasta_files/' | |
106 #end for | |
107 #end if | |
108 && find './outputdir/comparison_output' -name "*.list" -exec mv {} {}.txt \; | |
109 && true | |
110 && printf "************ COMPARISON METRICS ***************\n" > stats.txt | |
111 && for file_name in ./outputdir/comparison_output/*txt; do printf "\n************ \$file_name ************\n" >> stats.txt | |
112 && sed 's/^ ==.*/&\n/' \$file_name | tail -q -n +2 "\$file_name" >> stats.txt; done | |
113 && cat stats.txt > $stats | |
114 #end if | |
164 ]]> </command> | 115 ]]> </command> |
165 <inputs> | 116 <inputs> |
166 <param argument="--transcripts" type="data" format="fasta" multiple="true" label="Transcripts" help="File(s) with transcripts in FASTA format."/> | 117 <param argument="--transcripts" type="data" format="fasta" multiple="true" label="Transcripts" help="File(s) with transcripts in FASTA format."/> |
167 <param name="strand_specific" argument="-ss" type="boolean" truevalue="-ss" falsevalue="" checked="false" label="Strand-specific" | 118 <conditional name="reads_option"> |
168 help="Set if transcripts were assembled using strand-specific RNA-Seq data in order to benefit from knowing whether the transcript originated from the + or - strand"/> | 119 <param name="selector" type="select" label="Single-end or paired-end reads"> |
169 <param name="r" optional="true" argument="-r" type="data" format="fasta" multiple="true" label="Reference genome" help="File with reference genome containing all chromosomes/scaffolds in FASTA forma." /> | 120 <option value="" selected="true">Disabled-end</option> |
121 <option value="single" selected="true">Single-end</option> | |
122 <option value="paired">Paired-end (as individual datasets)</option> | |
123 </param> | |
124 <when value=""/> | |
125 <when value="single"> | |
126 <param format="fastq,fastq.gz,fastqsanger,fastqsanger.gz" name="single_reads" type="data" label="RNA-Seq FASTQ/FASTA file"/> | |
127 </when> | |
128 <when value="paired"> | |
129 <param name="forward_reads" format="fastq,fastq.gz,fastqsanger ,fastqsanger.gz" type="data" label="RNA-Seq FASTQ/FASTA file, forward reads"/> | |
130 <param name="reverse_reads" format="fastq,fastq.gz,fastqsanger, fastqsanger.gz" type="data" label="RNA-Seq FASTQ/FASTA file, reverse reads"/> | |
131 </when> | |
132 </conditional> | |
133 <param argument="--reference" type="data" format="fasta" label="Reference genome" multiple="true" optional="true" help="File with reference genome containing all chromosomes/scaffolds in FASTA forma." /> | |
170 <conditional name="gene_coordinates"> | 134 <conditional name="gene_coordinates"> |
171 <param name="use_gtf" type="select" label="Use file with gene coordinates in GTF/GFF format?" help="We recommend to use files downloaded from GENCODE or Ensembl."> | 135 <param name="selector" type="select" label="Genome annotation" help="Genome annotation file. We recommend to use files downloaded from GENCODE or Ensembl."> |
172 <option value="true" selected="true">Yes</option> | 136 <option value="true">Enabled</option> |
173 <option value="false">No</option> | 137 <option value="false" selected="true">Disabled</option> |
174 </param> | 138 </param> |
175 <when value="true"> | 139 <when value="true"> |
176 <param name="gtf" argument="--gtf" type="data" format="gtf,gff,gff3" multiple="true" label="GTF/GFF file" /> | 140 <param argument="--gtf" type="data" format="gtf,gff,gff3" multiple="true" label="GTF/GFF file" /> |
177 <param argument="--disable_infer_genes" type="boolean" truevalue="--disable_infer_genes" falsevalue="" checked="false" label=" GTF file contains genes records?" | 141 <param argument="--disable_infer_genes" type="boolean" truevalue="--disable_infer_genes" falsevalue="" checked="false" label=" Disable infer genes" |
178 help="Use this option if your GTF file already contains genes records, otherwise gffutils will fix it. Note that gffutils may work for quite a long time"/> | 142 help="Use this option if your GTF file already contains genes records, otherwise gffutils will fix it. Note that gffutils may work for quite a long time"/> |
179 <param argument="--disable_infer_transcripts" type="boolean" truevalue="--disable_infer_transcripts" falsevalue="" checked="false" label="GTF file contains transcripts records?" help="Is option if your GTF file already contains transcripts records, otherwise gffutils will fix it."/> | 143 <param argument="--disable_infer_transcripts" type="boolean" truevalue="--disable_infer_transcripts" falsevalue="" checked="false" label="Disable infer transcripts" help="Is option if your GTF file already contains transcripts records, otherwise gffutils will fix it."/> |
180 </when> | 144 </when> |
181 <when value="false"> | 145 <when value="false"> |
182 </when> | 146 </when> |
183 </conditional> | 147 </conditional> |
184 <param argument="--prokaryote" type="boolean" truevalue="--prokaryote" falsevalue="" checked="false" label="Is genome prokararyotic?" help="Use this option if the genome is prokaryotic."/> | 148 <param argument="--reads_alignment" type="data" format="sam" label="Aligned reads to reference genome" optional="true" help="File with read alignments to the reference genome" /> |
185 <param argument="--min_alignment" type="integer" value="50" label="Minimal alignment length to be used" help="Default value is 50"/> | 149 <conditional name="use_busco"> |
186 <param argument="--blat" type="boolean" truevalue="--blat" falsevalue="" checked="false" label="Run with BLAT alignment tool instead of GMAP?" help="Blat is especially useful for aligning long sequences and gapped mapping, which cannot be performed properly by other fast sequence mappers designed for short reads. " /> | 150 <param argument="selector" type="select" label="Run BUSCO" help="BUSCO allows to detect core genes in the assembled transcripts"> |
187 <conditional name="busco_option"> | |
188 <param argument="--busco" type="select" label="Run BUSCO tool?" help="BUSCO allows to detect core genes in the assembled transcripts"> | |
189 <option value="false">Disabled</option> | 151 <option value="false">Disabled</option> |
190 <option value="true">Enabled</option> | 152 <option value="true">Enabled</option> |
191 </param> | 153 </param> |
192 <when value="false"/> | 154 <when value="false"/> |
193 <when value="true"> | 155 <when value="true"> |
194 <param name="lineage" type="select" label="Lineage" help="Select a lineage for using BUSCO"> | 156 <conditional name="lineage_conditional"> |
195 <option value="metazoa">Metazoa</option> | 157 <param name="selector" type="select" label="Lineage data source"> |
196 <option value="eukaryota">Eukaryota</option> | 158 <option value="download">Download lineage data</option> |
197 <option value="arthropoda">Arthropoda</option> | 159 <option value="cached" selected="true">Use cached lineage data</option> |
198 <option value="vertebrata">Vertebrata</option> | 160 </param> |
199 <option value="fungi">Fungi</option> | 161 <when value="cached"> |
200 <option value="bacteria">Bacteria</option> | 162 <param name="cached_db" label="Cached database with lineage" type="select"> |
201 </param> | 163 <options from_data_table="busco_database"> |
164 <validator message="No BUSCO database is available" type="no_options" /> | |
165 </options> | |
166 </param> | |
167 </when> | |
168 <when value="download"> | |
169 <param name="lineage" type="select" label="Lineage" help="Select a lineage for using BUSCO"> | |
170 <option value="metazoa">Metazoa</option> | |
171 <option value="eukaryota">Eukaryota</option> | |
172 <option value="arthropoda">Arthropoda</option> | |
173 <option value="vertebrata">Vertebrata</option> | |
174 <option value="fungi">Fungi</option> | |
175 <option value="bacteria">Bacteria</option> | |
176 </param> | |
177 </when> | |
178 </conditional> | |
202 </when> | 179 </when> |
203 </conditional> | 180 </conditional> |
204 <!--param argument="-\-gene_mark" type="boolean" truevalue="-\-gene_mark" falsevalue="" checked="false" label="Run with GeneMarkS-T gene prediction tool?" help="GeneMarkS-T allows to predict genes in the assembled transcripts without reference genome"/--> | 181 <section name="advanced_options" title="Advaced options" > |
205 <param argument="--meta" type="boolean" truevalue="--meta" falsevalue="" checked="false" label="Meta Transcriptome" help="Run quality asessment for meta-transcriptome assemblies" /> | 182 <param name="strand_specific" argument="-ss" type="boolean" truevalue="-ss" falsevalue="" checked="false" label="Strand-specific RNA-seq data" |
206 <param argument="--lower_threshold" type="integer" value="50" label="Lower threshold for x-assembled/covered/matched metrics." /> | 183 help="Set if transcripts were assembled using strand-specific RNA-Seq data in order to benefit from knowing whether the transcript originated from the + or - strand"/> |
207 <param argument="--upper_threshold" type="integer" value="95" label="Upper threshold for x-assembled/covered/matched metrics." /> | 184 <param argument="--min_alignment" type="integer" min="0" value="50" label="Minimal alignment length to be used" help="Default value is 50"/> |
208 <param name="out_sr" type="select" multiple="true" label="Short report formats"> | 185 <param argument="--blat" type="boolean" truevalue="--blat" falsevalue="" checked="false" label="Run with BLAT instead of GMAP" help="BALT is especially useful for aligning long sequences and gapped mapping, which cannot be performed properly by other fast sequence mappers designed for short reads. " /> |
209 <option value="tsv" selected="true">tabular</option> | 186 <!-- GeneMarkST is not in Bioconda --> |
210 <option value="txt">txt</option> | 187 <!--param argument="-\-gene_mark" type="boolean" truevalue="-\-gene_mark" falsevalue="" checked="false" label="Run with GeneMarkS-T gene prediction tool?" |
211 <option value="tex">tex</option> | 188 help="GeneMarkS-T allows to predict genes in the assembled transcripts without reference genome"/--> |
212 <option value="pdf" selected="true">pdf</option> | 189 <param argument="--meta" type="boolean" truevalue="--meta" falsevalue="" checked="false" label="Meta Transcriptome" help="Run quality asessment for meta-transcriptome assemblies" /> |
213 </param> | 190 <param argument="--lower_threshold" type="integer" value="50" label="Lower threshold for x-assembled/covered/matched metrics." /> |
214 <param name="out_add" type="select" multiple="true" label="Additional outputs"> | 191 <param argument="--upper_threshold" type="integer" value="95" label="Upper threshold for x-assembled/covered/matched metrics." /> |
215 <option value="logs">Logs</option> | 192 <param argument="--prokaryote" type="boolean" truevalue="--prokaryote" falsevalue="" checked="false" label="Prokararyotic organism(s)" help="Use this option if the genome is prokaryotic"/> |
216 <option value="plots" selected="true">Plots (only for n>1)</option> | 193 </section> |
217 <option value="comparison" selected="true">Comparison for Chromosomes/scaffolds files (only for n>1)</option> | 194 <section name="output_options" title="Output options" expanded="true"> |
218 <option value="details" selected="true">Details per Chromosomes/scaffolds file</option> | 195 <param name="out_sr" type="select" multiple="true" display="checkboxes" label="Short report formats"> |
219 <option value="details_plots" selected="true">Details per Chromosomes/scaffolds file as plot</option> | 196 <option value="tabular">Tabular</option> |
220 </param> | 197 <option value="tex">TeX</option> |
198 <option value="pdf" selected="true">PDF</option> | |
199 </param> | |
200 <param name="out_add" type="select" label="Additional outputs" multiple="true" display="checkboxes"> | |
201 <option value="complete">Complete report</option> | |
202 <option value="fasta" >FASTA files</option> | |
203 <option value="logs">Logs</option> | |
204 <option value="gz">Compressed output folder</option> | |
205 </param> | |
206 </section> | |
221 </inputs> | 207 </inputs> |
222 | |
223 <outputs> | 208 <outputs> |
224 <data name="short_report_pdf" format="pdf" label="${tool.name} on ${on_string}: pdf report" from_work_dir="outputdir/short_report.pdf"> | 209 <data name="stats" format="txt" label="${tool.name} on ${on_string}: complete report"> |
225 <filter>"pdf" in out_sr</filter> | 210 <filter>output_options['out_add'] and "complete" in output_options['out_add']</filter> |
226 </data> | |
227 <data name="short_report_txt" format="txt" label="${tool.name} on ${on_string}: txt report" from_work_dir="outputdir/short_report.txt"> | |
228 <filter>"txt" in out_sr</filter> | |
229 </data> | |
230 <data name="short_report_tex" format="txt" label="${tool.name} on ${on_string}: tex report" from_work_dir="outputdir/short_report.tex"> | |
231 <filter>"tex" in out_sr</filter> | |
232 </data> | |
233 <data name="short_report_tsv" format="tabular" label="${tool.name} on ${on_string}: tsv report" from_work_dir="outputdir/short_report.tsv"> | |
234 <filter>"tsv" in out_sr</filter> | |
235 </data> | 211 </data> |
236 <collection name="list_logs" type="list" label="${tool.name} on ${on_string}: logs"> | 212 <collection name="list_logs" type="list" label="${tool.name} on ${on_string}: logs"> |
237 <discover_datasets ext="txt" pattern="(?P<name>.+)\.log" directory="outputdir/logs/" visible="false" /> | 213 <discover_datasets ext="txt" pattern="(?P<name>.+)\.log" directory="outputdir/logs" visible="false" /> |
238 <filter>"logs" in out_add</filter> | 214 <filter>output_options['out_add'] and "logs" in output_options['out_add']</filter> |
239 </collection> | 215 </collection> |
240 <!-- note the output filter of the next two outputs checks if there is | 216 <collection name="fasta_files" type="list" label="${tool.name} on ${on_string}: FASTA files"> |
241 more than 1 input for transcripts (for 1 its a HDA, for more list or HDAs) --> | 217 <discover_datasets ext="fasta" pattern="(?P<name>.+)\.fasta" directory="fasta_files" visible="false" /> |
242 <collection name="comparison_png" type="list" label="${tool.name} on ${on_string}: comparison plots"> | 218 <filter>output_options['out_add'] and "fasta" in output_options['out_add']</filter> |
243 <discover_datasets ext="png" pattern="(?P<name>.+)\.png" directory="outputdir/comparison_output/" visible="false" recurse="true" /> | 219 <filter>gene_coordinates['selector'] == 'true'</filter> |
244 <filter> isinstance(transcripts, list) and "plots" in out_add</filter> | 220 <filter>reference</filter> |
245 </collection> | 221 </collection> |
246 <collection name="comparison" type="list" label="${tool.name} on ${on_string}: comparison"> | 222 <data name="compressed_files" format="tgz" label="${tool.name} on ${on_string}: compressed results folder" from_work_dir="results.tar.gz"> |
247 <discover_datasets ext="txt" pattern="(?P<name>.+)\.txt" directory="outputdir/comparison_output/" visible="false" recurse="true" /> | 223 <filter>output_options['out_add'] and "gz" in output_options['out_add']</filter> |
248 <filter> isinstance(transcripts, list) and "comparison" in out_add</filter> | 224 </data> |
249 </collection> | 225 <data name="short_report_pdf" format="pdf" label="${tool.name} on ${on_string}: short report (pdf)" from_work_dir="outputdir/short_report.pdf"> |
250 <collection name="details" type="list:list" label="${tool.name} on ${on_string}: detailed output"> | 226 <filter>output_options['out_sr'] and "pdf" in output_options['out_sr']</filter> |
251 <discover_datasets pattern="(?P<identifier_0>.+)_____(?P<identifier_1>.+)\.(?P<ext>txt)" directory="details/" visible="false" /> | 227 </data> |
252 <filter>"details" in out_add</filter> | 228 <data name="short_report_tex" format="txt" label="${tool.name} on ${on_string}: short report (tex)" from_work_dir="outputdir/short_report.tex"> |
253 </collection> | 229 <filter>output_options['out_sr'] and "tex" in output_options['out_sr']</filter> |
254 <collection name="details_png" type="list:list" label="${tool.name} on ${on_string}: detailed output plots"> | 230 </data> |
255 <discover_datasets pattern="(?P<identifier_0>.+)_____(?P<identifier_1>.+)\.(?P<ext>png)" directory="details/" visible="false" /> | 231 <data name="short_report_tabular" format="tabular" label="${tool.name} on ${on_string}: short report (tabular)" from_work_dir="outputdir/short_report.tsv"> |
256 <filter>"details_plots" in out_add</filter> | 232 <filter>output_options['out_sr'] and "tabular" in output_options['out_sr']</filter> |
257 </collection> | 233 </data> |
258 </outputs> | 234 </outputs> |
259 <tests> | 235 <tests> |
260 <test expect_num_outputs="7"> | 236 <!-- Test 01: Minimum input txt output--> |
261 <param name="transcripts" value="idba.fasta,Trinity.fasta" ftype="fasta" /> | 237 <test expect_num_outputs="1"> |
262 <param name="r" value="Saccharomyces_cerevisiae.R64-1-1.75.dna.toplevel.fa" ftype="fasta" /> | 238 <param name="transcripts" value="transcriptome01.fasta"/> |
263 <conditional name="gene_coordinates"> | 239 <section name="output_options"> |
264 <param name="use_gtf" value="true" /> | 240 <param name="out_sr" value="tabular"/> |
265 <param name="gtf" value="Saccharomyces_cerevisiae.R64-1-1.75.gtf" ftype="gtf" /> | 241 </section> |
266 <param name="disable_infer_genes" value="true" /> | 242 <output name="short_report_tabular" file="test_01_short_report.tab"/> |
267 <param name="disable_infer_transcripts" value="true" /> | 243 </test> |
268 </conditional> | 244 <!-- Test 02: Transcriptome reference,single read, txt output--> |
269 <param name="out_sr" value="txt,tex,tsv" /> | 245 <test expect_num_outputs="1"> |
270 <param name="out_add" value="logs,comparison,plots,details" /> | 246 <param name="transcripts" value="transcriptome01.fasta"/> |
271 <expand macro="txt_output_test" /> | 247 <section name="output_options"> |
272 <expand macro="tex_output_test" /> | 248 <param name="out_sr" value="tabular"/> |
273 <expand macro="tsv_output_test" /> | 249 </section> |
274 <output_collection name="comparison_png" type="list" count="15" /> | 250 <conditional name="reads_option"> |
275 <output_collection name="comparison" type="list" count="19" /> | 251 <param name="selector" value="single"/> |
276 <output_collection name="list_logs" type="list" count="8" /> | 252 <param name="single_reads" value="single_end.fastq.gz"/> |
277 <output_collection name="details" type="list:list" count="2"> | 253 </conditional> |
278 <expand macro="details_output_test" assembler="Trinity" /> | 254 <output name="short_report_tabular"> |
279 <expand macro="details_output_test" assembler="idba" /> | 255 <assert_contents> |
256 <has_text text="Transcripts" /> | |
257 <has_size value="95" delta="5"/> | |
258 </assert_contents> | |
259 </output> | |
260 </test> | |
261 <!-- Test 03: Transcriptome reference and annotation, txt output--> | |
262 <test expect_num_outputs="1"> | |
263 <param name="transcripts" value="transcriptome01.fasta"/> | |
264 <conditional name="gene_coordinates"> | |
265 <param name="selector" value="true"/> | |
266 <param name="gtf" value="reference.gtf"/> | |
267 </conditional> | |
268 <section name="output_options"> | |
269 <param name="out_sr" value="tabular"/> | |
270 </section> | |
271 <conditional name="reads_option"> | |
272 <param name="selector" value="single"/> | |
273 <param name="single_reads" value=""/> | |
274 </conditional> | |
275 <output name="short_report_tabular" file="test_03_short_report.tab"/> | |
276 </test> | |
277 <!-- Test 04: Transcriptome reference and annotation, txt output--> | |
278 <test expect_num_outputs="1"> | |
279 <param name="transcripts" value="transcriptome01.fasta"/> | |
280 <conditional name="gene_coordinates"> | |
281 <param name="selector" value="true"/> | |
282 <param name="gtf" value="reference.gtf"/> | |
283 </conditional> | |
284 <section name="output_options"> | |
285 <param name="out_sr" value="tabular"/> | |
286 </section> | |
287 <conditional name="reads_option"> | |
288 <param name="selector" value="single"/> | |
289 <param name="single_reads" value="single_end.fastq.gz"/> | |
290 </conditional> | |
291 <output name="short_report_tabular"> | |
292 <assert_contents> | |
293 <has_text text="Transcripts" /> | |
294 <has_size value="140" delta="5"/> | |
295 </assert_contents> | |
296 </output> | |
297 </test> | |
298 <!-- Test 05: Transcriptome reference, annotation and mapping, txt output--> | |
299 <test expect_num_outputs="1"> | |
300 <param name="transcripts" value="transcriptome01.fasta"/> | |
301 <conditional name="gene_coordinates"> | |
302 <param name="selector" value="true"/> | |
303 <param name="gtf" value="reference.gtf"/> | |
304 </conditional> | |
305 <section name="output_options"> | |
306 <param name="out_sr" value="tabular"/> | |
307 </section> | |
308 <conditional name="reads_option"> | |
309 <param name="selector" value='paired'/> | |
310 <param name="forward_reads" value="input_F.fastqsanger"/> | |
311 <param name="reverse_reads" value="input_F.fastqsanger"/> | |
312 </conditional> | |
313 <output name="short_report_tabular"> | |
314 <assert_contents> | |
315 <has_text text="Transcripts" /> | |
316 <has_size value="140" delta="5"/> | |
317 </assert_contents> | |
318 </output> | |
319 </test> | |
320 <!-- Test 06: Transcriptome reference, annotation, mapping and BUSCO, txt output--> | |
321 <test expect_num_outputs="1"> | |
322 <param name="transcripts" value="transcriptome01.fasta"/> | |
323 <conditional name="gene_coordinates"> | |
324 <param name="selector" value="true"/> | |
325 <param name="gtf" value="reference.gtf"/> | |
326 </conditional> | |
327 <conditional name="reads_option"> | |
328 <param name="selector" value='paired'/> | |
329 <param name="forward_reads" value="input_F.fastqsanger"/> | |
330 <param name="reverse_reads" value="input_R.fastqsanger"/> | |
331 </conditional> | |
332 <section name="output_options"> | |
333 <param name="out_sr" value="tabular"/> | |
334 </section> | |
335 <conditional name="use_busco"> | |
336 <param name="selector" value="true"/> | |
337 <conditional name="lineage_conditional"> | |
338 <param name="selector" value="cached"/> | |
339 <param name="cached_db" value="busco-demo-db-20230328"/> | |
340 </conditional> | |
341 </conditional> | |
342 <output name="short_report_tabular"> | |
343 <assert_contents> | |
344 <has_text text="Transcripts" /> | |
345 <has_size value="140" delta="5"/> | |
346 </assert_contents> | |
347 </output> | |
348 | |
349 </test> | |
350 <!-- Test 07: Transcriptome reference, annotation, mapping and BUSCO, additional outputs--> | |
351 <test expect_num_outputs="4"> | |
352 <param name="transcripts" value="transcriptome01.fasta"/> | |
353 <conditional name="gene_coordinates"> | |
354 <param name="selector" value="true"/> | |
355 <param name="gtf" value="reference.gtf"/> | |
356 </conditional> | |
357 <param name="reference" value="reference.fasta"/> | |
358 <conditional name="reads_option"> | |
359 <param name="selector" value='paired'/> | |
360 <param name="forward_reads" value="input_F.fastqsanger"/> | |
361 <param name="reverse_reads" value="input_R.fastqsanger"/> | |
362 </conditional> | |
363 <conditional name="use_busco"> | |
364 <param name="selector" value="true"/> | |
365 <conditional name="lineage_conditional"> | |
366 <param name="selector" value="cached"/> | |
367 <param name="cached_db" value="busco-demo-db-20230328"/> | |
368 </conditional> | |
369 </conditional> | |
370 <section name="output_options"> | |
371 <param name="out_sr" value="pdf,tabular"/> | |
372 <param name="out_add" value="fasta,gz"/> | |
373 </section> | |
374 <output_collection name="fasta_files" type="list" count="7"> | |
375 <element name="transcriptome01.paralogs" file="test_07_paralogs.fasta" ftype="fasta"/> | |
280 </output_collection> | 376 </output_collection> |
281 </test> | 377 <output name="short_report_pdf" file="test_07_short_report.pdf" ftype="pdf" compare="sim_size" delta="1000"/> |
378 <output name="short_report_tabular" file="test_07_short_report.tab" ftype="tabular"/> | |
379 <output name="compressed_files" ftype="tgz"> | |
380 <assert_contents> | |
381 <has_size value="281260" delta="250"/> | |
382 </assert_contents> | |
383 </output> | |
384 </test> | |
385 <!-- Test 08: Multiple inputs--> | |
282 <test expect_num_outputs="6"> | 386 <test expect_num_outputs="6"> |
283 <param name="transcripts" value="Trinity.fasta" ftype="fasta" /> | 387 <param name="transcripts" value="transcriptome01.fasta,transcriptome02.fasta"/> |
284 <conditional name="gene_coordinates"> | 388 <param name="reference" value="reference.fasta"/> |
285 <param name="use_gtf" value="false" /> | 389 <conditional name="gene_coordinates"> |
286 </conditional> | 390 <param name="selector" value="true"/> |
287 <param name="min_alignment" value="30" /> | 391 <param name="gtf" value="reference.gtf"/> |
288 <param name="lower_threshold" value="45" /> | 392 </conditional> |
289 <param name="upper_threshold" value="95" /> | 393 <section name="output_options"> |
290 <param name="out_sr" value="txt,tex,tsv,pdf" /> | 394 <param name="out_sr" value="tabular,pdf"/> |
291 <param name="out_add" value="logs,details_plots" /> | 395 </section> |
292 | 396 <conditional name="use_busco"> |
293 <expand macro="pdf_output_test" /> | 397 <param name="selector" value="true"/> |
294 <expand macro="tex_output_test" /> | 398 <conditional name="lineage_conditional"> |
295 <expand macro="tsv_output_test" /> | 399 <param name="selector" value="cached"/> |
296 <expand macro="txt_output_test" /> | 400 <param name="cached_db" value="busco-demo-db-20230328"/> |
297 <output_collection name="list_logs" type="list"> | 401 </conditional> |
298 <expand macro="element_has_text" name="Trinity.GeneMarkS_T.err" text="" /> | 402 </conditional> |
299 <expand macro="element_matching_line" name="rnaQUAST" expression="Thank you for using rnaQUAST!" /> | 403 <param name="out_add" value="complete,fasta,logs,gz"/> |
404 <conditional name="reads_option"> | |
405 <param name="selector" value="single"/> | |
406 <param name="single_reads" value="single_end.fastq.gz"/> | |
407 </conditional> | |
408 <output name="short_report_tabular" value="test_08_short_report.tab" ftype="tabular"/> | |
409 <output name="short_report_pdf" value="test_08_short_report.pdf" ftype="pdf"/> | |
410 <output name="stats" value="test_08_complete_report.tab" ftype="txt" lines_diff="6" /> | |
411 <output_collection name="fasta_files" type="list" count="14"> | |
412 <element name="transcriptome01.paralogs" file="test_08_paralogs.fasta" ftype="fasta"/> | |
300 </output_collection> | 413 </output_collection> |
301 <output_collection name="details_png" type="list:list" count="1"> | 414 <output_collection name="list_logs" type="list" count="14"> |
302 <element name="Trinity"> | 415 <element name="STAR.out" ftype="txt"> |
303 <expand macro="element_has_text" name="Nx" text="PNG" /> | 416 <assert_contents> |
304 <expand macro="element_has_text" name="transcript_length" text="PNG" /> | 417 <has_text text="STAR --runThreadN"/> |
418 <has_text text="finished successfully"/> | |
419 </assert_contents> | |
420 </element> | |
421 <element name="gmap_build.out" ftype="txt"> | |
422 <assert_contents> | |
423 <has_text text="No alternate scaffolds observed"/> | |
424 </assert_contents> | |
425 </element> | |
426 <element name="rnaQUAST" ftype="txt"> | |
427 <assert_contents> | |
428 <has_text text="THE QUALITY OF TRANSCRIPTOME ASSEMBLY DONE"/> | |
429 <has_text text="Thank you for using rnaQUAST!"/> | |
430 </assert_contents> | |
305 </element> | 431 </element> |
306 </output_collection> | 432 </output_collection> |
307 </test> | 433 </test> |
308 <test expect_num_outputs="6"> | 434 |
309 <param name="transcripts" value="Trinity.fasta" ftype="fasta" /> | |
310 <conditional name="gene_coordinates"> | |
311 <param name="use_gtf" value="false" /> | |
312 </conditional> | |
313 <param name="min_alignment" value="30" /> | |
314 <param name="lower_threshold" value="45" /> | |
315 <param name="upper_threshold" value="95" /> | |
316 <param name="out_sr" value="txt,tex,tsv,pdf" /> | |
317 <param name="out_add" value="logs,details_plots" /> | |
318 <conditional name="busco_option"> | |
319 <param name="busco" value="true"/> | |
320 <param name="lineage" value="metazoa"/> | |
321 </conditional> | |
322 <expand macro="pdf_output_test" /> | |
323 <expand macro="tex_output_test" /> | |
324 <expand macro="tsv_output_test" /> | |
325 <expand macro="txt_output_test" /> | |
326 <output_collection name="list_logs" type="list"> | |
327 <expand macro="element_has_text" name="Trinity.GeneMarkS_T.err" text="" /> | |
328 <expand macro="element_matching_line" name="rnaQUAST" expression="Thank you for using rnaQUAST!" /> | |
329 </output_collection> | |
330 <output_collection name="details_png" type="list:list" count="1"> | |
331 <element name="Trinity"> | |
332 <expand macro="element_has_text" name="Nx" text="PNG" /> | |
333 <expand macro="element_has_text" name="transcript_length" text="PNG" /> | |
334 </element> | |
335 </output_collection> | |
336 <assert_command> | |
337 <has_text text="--busco metazoa"/> | |
338 </assert_command> | |
339 </test> | |
340 </tests> | 435 </tests> |
341 <help><![CDATA[ | 436 <help><![CDATA[ |
342 **What is rnaQUAST** | 437 |
343 - a quality assessment tool for de novo transcriptome assemblies | 438 .. class:: infomark |
344 - evaluating RNA-Seq assembly quality and benchmarking transcriptome assemblers using reference genome and gene database | 439 |
345 - calculates various metrics that demonstrate completeness and correctness levels of the assembled transcripts | 440 **Purpose** |
346 | 441 |
347 **Using rnaQuast without reference** you wont get: | 442 rnaQUAST is a tool for evaluating RNA-Seq assemblies using reference genome and gene database. In addition, rnaQUAST is also capable |
348 | 443 of estimating gene database coverage by raw reads and de novo quality assessment. |
349 - x-assembled (Exons) | 444 |
350 - Alignments per Isoform | 445 .. class:: infomark |
351 - x-covered (Exons) | 446 |
352 - x-matched (Blocks) | 447 **rnaQUAST pipeline** |
353 - gmap build logs | 448 |
354 | 449 To evaluate quality of the assembled transcripts, rnaQUAST takes a reference genome in FASTA format and optionally its gene database in |
355 **Using rnaQuast with reference** you will get: | 450 GFF/GTF format. A user can provide either a FASTA file with transcripts, which will be aligned to the given reference genome using GMAP |
356 - Reports | 451 or BLAT. The alignments are analyzed to calculate simple metrics and then are matched against the isoforms from the gene database in order |
357 - Logs | 452 to obtain statistics that represent completeness and correctness levels of the assembly. In addition, rnaQUAST is capable of estimating |
358 - Alignement/Basic Metrics | 453 gene database coverage by raw reads using STAR or TopHat2. For de novo quality assessment when reference genome and gene database are |
359 - Misassemblies/ Specificity/ Sensitivity | 454 unavailable, the transcripts are analyzed using BUSCO. |
360 - Alignment multiplicity | 455 |
361 - Block/ Transcript Lentgh | 456 .. class:: infomark |
362 - Blocks per alignment | 457 |
363 - Mismatch rate | 458 **Metrics and alignment analysis** |
364 - x-aligned | 459 |
365 - Nx | 460 rnaQUAST calculates various metrics without using alignment information, e.g. length distribution and N50 of the assembled transcripts. |
366 - Blocks per alignment | 461 Additionally, rnaQUAST computes the following statistics for the gene database: the total number of genes and isoforms, isoform and exon |
367 - gmap build logs | 462 length distribution, average number of exons per gene, etc. |
368 | 463 |
369 **Using rnaQuast without gene coordinates** you wont get: | 464 To analyze transcripts' alignments, rnaQUAST firstly filters out short partial alignments (shorter than a user-defined threshold, default |
370 - x-assembled (Exons) | 465 value is 50 bp). Such short alignments are typically caused by genomic repeats and thus are ignored. Afterwards, rnaQUAST selects the |
371 - Alignments per Isoform | 466 best-scored spliced alignment for each transcript. If a transcript has more than one alignment with the highest score, it is reported |
372 - x-covered (Exons) | 467 as multiply aligned. Otherwise, it is considered to be uniquely aligned. If the best-scored alignment is discordant (e.g. the transcript |
373 - x-matched (Blocks) | 468 has partial alignments that are either mapped to different strands or to different chromosomes) the transcript is classified as misassembled. |
374 - gmap build logs | 469 Transcripts without misassemblies are analyzed to calculate such metrics as average transcript alignment fraction and mismatch rate. |
375 - Database Metrics | 470 |
376 - Alignment multiplicity | 471 For the simplicity of explanation, transcript is further referred to as a sequence generated by the assembler and isoform denotes a sequence |
377 - Mismatch rate | 472 from the gene database. rnaQUAST matches best-scored alignments of non-misassembled transcripts to the isoforms' coordinates and analyzes |
378 - NAx | 473 them to estimate how well the isoforms are covered by the assembly. rnaQUAST computes such metrics as database coverage (the total number |
379 - x-aligned | 474 of covered bases of all isoforms divided by the total length of all isoforms) and the number of 50%/95%-assembled isoforms. An isoform is |
380 **Using rnaQuast with gene coordinates** you will get: | 475 considered to be x%-assembled if it has at least x% covered by a single transcript. Vice versa, to evaluate how well the assembled |
381 - Reports | 476 transcripts are covered by the isoforms, rnaQUAST estimates the number of unannotated transcripts (that align to the genome, but do not |
382 - Logs | 477 match to any isoform) and the number of 50%/95%-matched transcripts (that have corresponding fraction mapped to an isoform). Indeed, the |
383 - Alignement/Basic Metrics | 478 thresholds described above (50% and 95%) can be varied by the user. |
384 - Misassemblies/Specificity/Sensitivity | 479 |
385 - Alignment multiplicity | 480 |
386 - Block/Transcript length | |
387 - Blocks per alignment | |
388 - Mismatch rate | |
389 - x-aligned | |
390 - Nx/NAx | |
391 - gmap build logs | |
392 - Database Metrics | |
393 - Alignment multiplicity | |
394 More informations, see citations. | |
395 ]]> </help> | 481 ]]> </help> |
396 <citations> | 482 <citations> |
397 <citation type="doi">10.1093/bioinformatics/btw218 </citation> | 483 <citation type="doi">10.1093/bioinformatics/btw218 </citation> |
398 </citations> | 484 </citations> |
399 </tool> | 485 </tool> |