Mercurial > repos > iuc > quast
comparison quast.xml @ 4:0834c823d4b9 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/quast commit 2c56b5f07500507dad503aab6ec0619ec37f8b87
author | iuc |
---|---|
date | Mon, 12 Feb 2018 09:05:20 -0500 |
parents | 6fcbee531de6 |
children | 81df4950d65b |
comparison
equal
deleted
inserted
replaced
3:6fcbee531de6 | 4:0834c823d4b9 |
---|---|
1 <tool id="quast" name="Quast" version="4.5" > | 1 <tool id="quast" name="Quast" version="4.6.3" > |
2 <description>Genome assembly Quality</description> | 2 <description>Genome assembly Quality</description> |
3 <requirements> | 3 <requirements> |
4 <requirement type="package" version="4.5">quast</requirement> | 4 <requirement type="package" version="4.6.3">quast</requirement> |
5 </requirements> | 5 </requirements> |
6 <stdio> | 6 <stdio> |
7 <regex match="ERROR! exception caught!" | 7 <regex match="ERROR! exception caught!" |
8 source="both" | 8 source="both" |
9 level="fatal" | 9 level="fatal" |
32 #if $input_size: | 32 #if $input_size: |
33 --est-ref-size $input_size | 33 --est-ref-size $input_size |
34 #end if | 34 #end if |
35 --min-contig $min_contig | 35 --min-contig $min_contig |
36 -l | 36 -l |
37 #set names = ','.join( ['"'+ re.sub('[^\w\-_]', '_', str( $x.input.element_identifier))+'"' for $x in $files ]) | 37 #set names = ','.join( ['"'+ re.sub('[^\w\-_]', '_', str( $x.element_identifier))+'"' for $x in $input ]) |
38 | 38 |
39 $names | 39 $names |
40 --contig-thresholds $threshold_contig | 40 --contig-thresholds $threshold_contig |
41 #for $k in $files: | 41 #for $k in $input: |
42 #if $k.type_file == "scaffold": | 42 $k |
43 -s | |
44 #end if | |
45 $k.input | |
46 #end for | 43 #end for |
47 && | 44 && |
48 mv outputdir/report.txt '$quast_out' && | |
49 mv outputdir/report.tsv '$quast_tsv' && | 45 mv outputdir/report.tsv '$quast_tsv' && |
50 mv outputdir/report.tex '$quast_tex' && | 46 ## The sed string below removes non-functional "Main menu" button from the viewer |
51 mv outputdir/icarus_viewers/contig_size_viewer.html '$icarus' && | 47 sed '\:<div class="btn btn-inverse" id="to_main_menu_button">:,\:</div>:d' outputdir/icarus_viewers/contig_size_viewer.html > '$contig_size_viewer' && |
52 mv outputdir/report.html '$report_html' | 48 #if $input_ref: |
49 ## The sed string below removes non-functional "Main menu" button from the viewer | |
50 sed '\:<div class="btn btn-inverse" id="to_main_menu_button">:,\:</div>:d' outputdir/icarus_viewers/alignment_viewer.html > '$alignment_viewer' && | |
51 #end if | |
52 ## The sed string below removes non-functional link from the report page | |
53 sed "\:<div id='icarus-json'>:,\:</div>:d" outputdir/report.html > '$report_html' && | |
54 mv outputdir/report.pdf '$report_pdf' && | |
55 mv outputdir/contigs_reports/misassemblies_report.tsv '$mis_ass_tsv' && | |
56 mv outputdir/contigs_reports/unaligned_report.tsv '$unalign_tsv' && | |
57 mv outputdir/quast.log '$log_txt' | |
53 ]]> | 58 ]]> |
54 </command> | 59 </command> |
55 <inputs> | 60 <inputs> |
56 <repeat name="files" title="Input assembly files" min="1"> | 61 <param type="data" format="fasta" name="input" label="Contigs/scaffolds output file" multiple="True"/> |
57 <param type="data" format="fasta" name="input" label="Contigs/scaffolds output file"/> | 62 <param name="type_file" type="select" label="Type of data"> |
58 <param name="type_file" type="select" label="Type of data"> | 63 <option value="contig">Contig</option> |
59 <option value="contig">Contig</option> | 64 <option value="scaffold">Scaffold</option> |
60 <option value="scaffold">Scaffold</option> | 65 </param> |
61 </param> | |
62 </repeat> | |
63 <param name="input_size" type="integer" label="Size of reference genome" optional="True" argument="--est-ref-size" | 66 <param name="input_size" type="integer" label="Size of reference genome" optional="True" argument="--est-ref-size" |
64 help="Estimated reference genome size (in bp) for computing NGx statistics, if known. This value will be used only if a reference genome file is not specified "/> | 67 help="Estimated reference genome size (in bp) for computing NGx statistics, if known. This value will be used only if a reference genome file is not specified "/> |
65 <param name="input_ref" type="data" format="fasta" label="Reference File" optional="True" argument="-R" | 68 <param name="input_ref" type="data" format="fasta" label="Reference genome" optional="True" argument="-R" |
66 help="Many metrics can't be evaluated without a reference. If this is omitted, QUAST will only report the metrics that can be evaluated without a reference."/> | 69 help="Many metrics can't be evaluated without a reference. If this is omitted, QUAST will only report the metrics that can be evaluated without a reference."/> |
67 <param name="annot" type="data" format="txt" label="Gene Annotations" optional="True" argument="-G" | 70 <param name="annot" type="data" format="gff, gff3, bed" label="Gene Annotations" optional="True" argument="-G" |
68 help="File with gene positions in the reference genome. "/> | 71 help="Gene coordinates for the reference genome (only relevant if the reference genome is used). "/> |
69 <param name="input_operon" type="data" format="txt" label="Operon Annotations" optional="True" argument="-O" | 72 <param name="input_operon" type="data" format="gff, gff3, bed" label="Operon Annotations" optional="True" argument="-O" help="Operon coordinates for the reference genome (only relevant if the reference genome is used)."/> |
70 help="File with operon positions in the reference genome"/> | |
71 <param name="gene_selection" type="select" label="Type of organism"> | 73 <param name="gene_selection" type="select" label="Type of organism"> |
72 <option value="prokaryotes">Prokaryotes</option> | 74 <option value="prokaryotes">Prokaryotes</option> |
73 <option value="eukaryote">Eukaryote</option> | 75 <option value="eukaryote">Eukaryote</option> |
74 <option value="metagenes">Metagenomes</option> | |
75 </param> | 76 </param> |
76 <param name="min_contig" type="integer" value="500" label="Lower Threshold" argument="--min-contig" | 77 <param name="min_contig" type="integer" value="500" label="Lower Threshold" argument="--min-contig" |
77 help="Set the lower threshold for a contig length. Shorter contigs won't be taken into account (default is 500)"/> | 78 help="Set the lower threshold for a contig length. Shorter contigs won't be taken into account [default is 500]"/> |
78 <param name="threshold_contig" type="text" value="0,1000" label="Thresholds" argument="--contig-thresholds" | 79 <param name="threshold_contig" type="text" value="0,1000" label="Thresholds" argument="--contig-thresholds" |
79 help="Set the thresholds for contig length. Comma-separated list of contig length thresholds.(default is 0,1000)"/> | 80 help="Set the thresholds for contig length. Comma-separated list of contig length thresholds.[default is 0,1000]"/> |
80 </inputs> | 81 </inputs> |
81 <outputs> | 82 <outputs> |
82 <data format="txt" name="quast_out" label="Quast report.txt" from_work_dir="Quast_report.txt"/> | 83 <data format="txt" name="log_txt" label="Quast: Log" /> |
83 <data format="tsv" name="quast_tsv" label="Quast report.tsv" from_work_dir="Quast_report.tsv"/> | 84 <data format="tabular" name="mis_ass_tsv" label="Quast: Misassemblies"> |
84 <data format="tex" name="quast_tex" label="Quast report.tex" from_work_dir="Quast_report.tex"/> | 85 <filter>input_ref is not None</filter> |
85 <data format="html" name="icarus" label="Icarus Contig size viewer" from_work_dir="Icarus.html"/> | 86 </data> |
86 <data format="html" name="report_html" label="Quast report.html" from_work_dir="Quast_report.html"/> | 87 <data format="tabular" name="unalign_tsv" label="Quast: Unaligned contigs"> |
88 <filter>input_ref is not None</filter> | |
89 </data> | |
90 <data format="html" name="contig_size_viewer" label="Quast: Contig view"/> | |
91 <data format="html" name="alignment_viewer" label="Quast: Alignment view"> | |
92 <filter>input_ref is not None</filter> | |
93 </data> | |
94 <data format="tabular" name="quast_tsv" label="Quast: Report (tabulal)"/> | |
95 <data format="html" name="report_html" label="Quast: Report (HTML)"/> | |
96 <data format="pdf" name="report_pdf" label="Quast: Report (PDF)"/> | |
87 </outputs> | 97 </outputs> |
88 <tests> | 98 <tests> |
89 <test> | 99 <test> |
90 <param name="input" value="contigs_1.fasta"/> | 100 <!-- Test with reference and genes annotations --> |
91 <param name="input.type_file" value="contig"/> | 101 <param name="input" value="contigs2.fna,contigs1.fna"/> |
102 <param name="input_ref" value="reference.fna"/> | |
103 <param name="type_file" value="contig"/> | |
104 <param name="annot" value="genes.gff"/> | |
92 <param name="gene_selection" value="prokaryotes"/> | 105 <param name="gene_selection" value="prokaryotes"/> |
93 <output name="quast_tsv" file="Quast_report.tsv" lines_diff="4"/> | 106 <output name="quast_tsv" file="test1_output.tsv" lines_diff="4"/> |
94 </test> | 107 </test> |
95 <test> | 108 <test> |
96 <param name="input" value="contigs_1.fasta"/> | 109 <!-- Test without reference --> |
97 <param name="input.type_file" value="contig"/> | 110 <param name="input" value="contigs2.fna,contigs1.fna"/> |
98 <param name="input_ref" value="NC000913.3_1-6650.fasta"/> | 111 <param name="type_file" value="contig"/> |
99 <param name="gene_selection" value="prokaryotes"/> | 112 <output name="quast_tsv" file="test2_output.tsv" lines_diff="4"/> |
100 <output name="quast_tsv" file="Quast_report_withref.tsv" lines_diff="4"/> | |
101 </test> | |
102 <test> | |
103 <param name="input" value="contigs_1.fasta"/> | |
104 <param name="input.type_file" value="contig"/> | |
105 <param name="input_ref" value="NC000913.3_1-6650.fasta"/> | |
106 <param name="annot" value="NC000913.3_1-6650.gff"/> | |
107 <param name="gene_selection" value="prokaryotes"/> | |
108 <output name="quast_tsv" file="Quast_report_withref_withgenes.tsv" lines_diff="4"/> | |
109 </test> | 113 </test> |
110 </tests> | 114 </tests> |
111 <help> | 115 <help> |
112 <![CDATA[ | 116 <![CDATA[ |
113 **Description** | 117 **What it does** |
114 | 118 |
115 Galaxy tool wrapper for the QUAST tool. Quast stands for QUality ASsessment Tool. It evaluates genome assemblies by computing various metrics. | 119 QUAST = QUality ASsessment Tool. The tool evaluates genome assemblies by computing various metrics. |
116 | 120 |
117 ----- | 121 If you have one or multiple genome assemblies, you can assess their quality with Quast. It works with or without reference genome. If you are new to Quast, start by reading its `manual page <http://quast.bioinf.spbau.ru/manual.html>`_. |
118 | 122 |
119 **Inputs and Outputs** | 123 **Using Quast without reference** |
120 | 124 |
121 - Input: | 125 Without reference Quast can calculate a number of assembly related-metrics but cannot provide any information about potential misassemblies, inversions, translocations, etc. Suppose you have three assemblies produced by Unicycler corresponding to three different antibiotic treatments *car*, *pit*, and *cef* (these stand for carbenicillin, piperacillin, and cefsulodin, respectively). Evaluating them without reference will produce the following Quast outputs: |
122 + The tool accepts assemblies and references in FASTA format. | 126 |
123 + The tool accepts annotation and operon files in: | 127 * Quast report in HTML format |
124 + GFF, versions 2 and 3 (note: feature/type field should be either "gene" or "operon") | 128 * `Contig viewer <http://quast.bioinf.spbau.ru/manual.html#sec3.4>`_ (an HTML file) |
125 + the format used by NCBI for genes ("Summary (text)"); | 129 * `Quast report <http://quast.bioinf.spbau.ru/manual.html#sec3.1.1>`_ in Tab-delimited format |
126 + four tab-separated columns: sequence name, gene/operon id, start position, end position | 130 * Quast log (a file technical information about Quast tool execution) |
127 | 131 |
128 - Output: | 132 The **tab delimited Quast report** will contain the following information:: |
129 + An assessment summary in plain text format | 133 |
130 + An assessment summary in tabulation separated values format | 134 Assembly pit_fna cef_fna car_fna |
131 + An assessment summary in LateX format | 135 # contigs (>= 0 bp) 100 91 94 |
132 + An assessment summary in HTML format | 136 # contigs (>= 1000 bp) 62 58 61 |
133 + An HTML view of contig sizes wit Icarus | 137 Total length (>= 0 bp) 6480635 6481216 6480271 |
134 | 138 Total length (>= 1000 bp) 6466917 6468946 6467103 |
139 # contigs 71 66 70 | |
140 Largest contig 848753 848766 662053 | |
141 Total length 6473173 6474698 6473810 | |
142 GC (%) 66.33 66.33 66.33 | |
143 N50 270269 289027 254671 | |
144 N75 136321 136321 146521 | |
145 L50 7 7 8 | |
146 L75 15 15 16 | |
147 # N's per 100 kbp 0.00 0.00 0.00 | |
148 | |
149 where values are defined as specified in `Quast manual <http://quast.bioinf.spbau.ru/manual.html#sec3.1.1>`_ | |
150 | |
151 **Quast report in HTML format** contains graphs in addition to the above metrics, while **Contig viewer** draws contigs ordered from longest to shortest. This ordering is suitable for comparing only largest contigs or number of contigs longer than a specific threshold. The viewer shows N50 and N75 with color and textual indication. If the reference genome is available or at least approximate genome length is known (see `--est-ref-size`), NG50 and NG75 are also shown. You can also tone down contigs shorter than a specified threshold using Icarus control panel: | |
152 | |
153 .. image:: $PATH_TO_IMAGES/contig_view_noR.png | |
154 :width: 558 | |
155 :height: 412 | |
156 | |
157 Also see `Plot description <http://quast.bioinf.spbau.ru/manual.html#sec3.2>`_ section of the manual. | |
158 | |
159 **Using Quast with reference** | |
160 | |
161 Car, pit, and cef are in fact assemblies of *Pseudomonas aeruginosa* UCBPP-PA14, so we can use its genome as a reference (by supplying a Fasta file containing *P. aeruginosa* pa14 genome to **Reference genome** input box). The following outputs will be produced (note the alignment viewer): | |
162 | |
163 * Quast report in HTML format | |
164 * `Contig viewer <http://quast.bioinf.spbau.ru/manual.html#sec3.4>`_ (an HTML file) | |
165 * `Alignment viewer <http://quast.bioinf.spbau.ru/manual.html#sec3.4>`_ (an HTML file) | |
166 * `Quast report <http://quast.bioinf.spbau.ru/manual.html#sec3.1.1>`_ in Tab-delimited format | |
167 * Summary of `misassemblies <http://quast.bioinf.spbau.ru/manual.html#sec3.1.2>`_ | |
168 * Summary of `unaligned contigs <http://quast.bioinf.spbau.ru/manual.html#sec3.1.3>`_ | |
169 * Quast log (a file technical information about Quast tool execution) | |
170 | |
171 With the reference Quast produces a much more comprehensive set of results:: | |
172 | |
173 Assembly pit_fna cef_fna car_fna | |
174 # contigs (>= 0 bp) 100 91 94 | |
175 # contigs (>= 1000 bp) 62 58 61 | |
176 Total length (>= 0 bp) 6480635 6481216 6480271 | |
177 Total length (>= 1000 bp) 6466917 6468946 6467103 | |
178 # contigs 71 66 70 | |
179 Largest contig 848753 848766 662053 | |
180 Total length 6473173 6474698 6473810 | |
181 Reference length 6537648 6537648 6537648 | |
182 GC (%) 66.33 66.33 66.33 | |
183 Reference GC (%) 66.29 66.29 66.29 | |
184 N50 270269 289027 254671 | |
185 NG50 270269 289027 254671 | |
186 N75 136321 136321 146521 | |
187 NG75 136321 136321 136321 | |
188 L50 7 7 8 | |
189 LG50 7 7 8 | |
190 L75 15 15 16 | |
191 LG75 15 15 17 | |
192 # misassemblies 0 0 0 | |
193 # misassembled contigs 0 0 0 | |
194 Misassembled contigs length 0 0 0 | |
195 # local misassemblies 1 1 2 | |
196 # unaligned mis. contigs 0 0 0 | |
197 # unaligned contigs 0 + 0 0 + 0 0 + 0 | |
198 part part part | |
199 Unaligned length 0 0 0 | |
200 Genome fraction (%) 99.015 99.038 99.025 | |
201 Duplication ratio 1.000 1.000 1.000 | |
202 # N's per 100 kbp 0.00 0.00 0.00 | |
203 # mismatches per 100 kbp 3.82 3.63 3.49 | |
204 # indels per 100 kbp 1.19 1.13 1.13 | |
205 Largest alignment 848753 848766 662053 | |
206 Total aligned length 6473163 6474660 6473792 | |
207 NA50 270269 289027 254671 | |
208 NGA50 270269 289027 254671 | |
209 NA75 136321 136321 146521 | |
210 NGA75 136321 136321 136321 | |
211 LA50 7 7 8 | |
212 LGA50 7 7 8 | |
213 LA75 15 15 16 | |
214 LGA75 15 15 17 | |
215 | |
216 where, again, values are defined as specified in `Quast manual <http://quast.bioinf.spbau.ru/manual.html#sec3.1.1>`_. You can see that this report includes a variety of data that can only be computer against a reference assembly. | |
217 | |
218 Using reference also produces an **Alignment viewer**: | |
219 | |
220 .. image:: $PATH_TO_IMAGES/Align_view.png | |
221 :width: 515 | |
222 :height: 395 | |
223 | |
224 Alignment viewer highlights regions of interest as, in this case, missassemblies that can potentially point to genome rearrangements (see more `here <http://quast.bioinf.spbau.ru/manual.html#sec3.4>`_). | |
135 | 225 |
136 ]]> | 226 ]]> |
137 </help> | 227 </help> |
138 | 228 |
139 <citations> | 229 <citations> |