Mercurial > repos > greg > quast
view quast.xml @ 0:5367786dc871 draft default tip
Uploaded
author | greg |
---|---|
date | Tue, 14 Mar 2023 15:21:14 +0000 |
parents | |
children |
line wrap: on
line source
<tool id="quast" name="Quast" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> <description>Genome assembly Quality</description> <macros> <import>macros.xml</import> </macros> <expand macro="bio_tools"/> <expand macro='requirements' /> <command detect_errors="exit_code"> <![CDATA[ #import re #import os #if str($in.custom) == 'false' #set $labels = ','.join( [re.sub('[^\w\-_]', '_', str($x.element_identifier)) for $x in $in.inputs]) echo $labels && #else #set $labels = [] #for $x in $in.inputs #if str($x.labels) != '' #silent $labels.append(re.sub('[^\w\-_]', '_', str($x.labels))) #else #silent $labels.append(re.sub('[^\w\-_]', '_', str($x.input.element_identifier))) #end if #end for #set $labels = ','.join($labels) #end if #if $assembly.type == 'metagenome' and $assembly.ref.origin == 'list' #set $temp_ref_list_fp = 'temp_ref_list' #for $i in $assembly.ref.references_list.split(',') echo $i >> $temp_ref_list_fp && #end for #end if #if $reads.reads_option == 'paired' #for $read in $reads.input_1 #set $identifier = re.sub('[^\s\w\-\\.]', '_', str($read.element_identifier)) ln -s '$read' 'pe1-${identifier}.${read.ext}' && #end for #for $read in $reads.input_2 #set $identifier = re.sub('[^\s\w\-\\.]', '_', str($read.element_identifier)) ln -s '$read' 'pe2-${identifier}.${read.ext}' && #end for #else if $reads.reads_option == 'paired_collection' #set $identifier = re.sub('[^\s\w\-\\.]', '_', str($reads.input_1.element_identifier)) ln -s '$reads.input_1.forward' 'pe1-${identifier}.${reads.input_1.forward.ext}' && ln -s '$reads.input_1.reverse' 'pe2-${identifier}.${reads.input_1.reverse.ext}' && #end if #if $assembly.type == 'genome' quast #else metaquast #end if #if $reads.reads_option == 'single' #for $read in $reads.input_1 --single '$read' #end for #else if $reads.reads_option == 'paired' #for $read in $reads.input_1 #set $identifier = re.sub('[^\s\w\-\\.]', '_', str($read.element_identifier)) --pe1 'pe1-${identifier}.${read.ext}' #end for #for $read in $reads.input_2 #set $identifier = re.sub('[^\s\w\-\\.]', '_', str($read.element_identifier)) --pe2 'pe2-${identifier}.${read.ext}' #end for #else if $reads.reads_option == 'paired_collection' #set $identifier = re.sub('[^\s\w\-\\.]', '_', str($reads.input_1.element_identifier)) --pe1 'pe1-${identifier}.${reads.input_1.forward.ext}' --pe2 'pe2-${identifier}.${reads.input_1.reverse.ext}' #else if $reads.reads_option == 'paired_interlaced' #for $read in $reads.input_1 --pe12 '$read' #end for #else if $reads.reads_option == 'mate_paired' #for $read in $reads.input_1 --mp1 '$read' #end for #for $read in $reads.input_2 --mp2 '$read' #end for #else if $reads.reads_option == 'pacbio' #for $read in $reads.input_1 --pacbio '$read' #end for #else if $reads.reads_option == 'nanopore' #for $read in $reads.input_1 --nanopore '$read' #end for #end if --labels '$labels' -o 'outputdir' #if $assembly.type == 'genome' #if $assembly.ref.use_ref == 'true' #if $assembly.ref.reference_source.reference_source_selector == 'history': -r '$assembly.ref.reference_source.r' #else: -r '$assembly.ref.reference_source.r.fields.path' #end if #if $assembly.ref.features --features '$assembly.ref.features' #end if #if $assembly.ref.operons --operons '$assembly.ref.operons' #end if $assembly.ref.circos $assembly.ref.k_mer.k_mer_stats #if str($assembly.ref.k_mer.k_mer_stats) != '' --k-mer-size $assembly.ref.k_mer.k_mer_size #end if #else if $assembly.ref.est_ref_size --est-ref-size $assembly.ref.est_ref_size #end if $assembly.orga_type #else if $assembly.type == 'metagenome' #if $assembly.ref.origin == 'history' -r '$assembly.ref.r' #else if $assembly.ref.origin == 'list' --references-list '$temp_ref_list_fp' #else if $assembly.ref.origin == 'silva' --test-no-ref --max-ref-num $assembly.ref.max_ref_num #end if $assembly.reuse_combined_alignments #end if --min-identity $assembly.min_identity --min-contig $min_contig $split_scaffolds $large #if str($genes.gene_finding.tool) != 'none' $genes.gene_finding.tool #if $genes.gene_finding.tool == '--gene_finding' or $genes.gene_finding.tool == '--glimmer' #set $gene_threshold = ','.join([x.strip() for x in str($genes.gene_finding.gene_thresholds).split(',')]) --gene-thresholds '$gene_threshold' #end if #end if $genes.rna_finding $genes.conserved_genes_finding $alignments.use_all_alignments --min-alignment $alignments.min_alignment --ambiguity-usage '$alignments.ambiguity_usage' --ambiguity-score $alignments.ambiguity_score $alignments.fragmented $alignments.upper_bound_assembly #if $alignments.upper_bound_min_con --upper-bound-min-con $alignments.upper_bound_min_con #end if #if $alignments.local_mis_size --local-mis-size $alignments.local_mis_size #end if #if $alignments.fragmented #if $advanced.fragmented_max_indent != '' --fragmented-max-indent $advanced.fragmented_max_indent #end if #end if #set $contig_thresholds = ','.join([x.strip() for x in str($advanced.contig_thresholds).split(',')]) --contig-thresholds '$contig_thresholds' $advanced.strict_NA --extensive-mis-size $advanced.extensive_mis_size --scaffold-gap-max-size $advanced.scaffold_gap_max_size --unaligned-part-size $advanced.unaligned_part_size $advanced.skip_unaligned_mis_contigs $advanced.report_all_metrics --x-for-Nx $advanced.x_for_Nx #if str($in.custom) == 'false' #for $k in $in.inputs '$k' #end for #else #for $k in $in.inputs '$k.input' #end for #end if --threads \${GALAXY_SLOTS:-1} #if $assembly.type == 'genome' && mkdir -p '$report_html.files_path' && cp outputdir/*.html '$report_html.files_path' #if $assembly.ref.use_ref && cp -R outputdir/icarus_viewers '$report_html.files_path' #end if #else && if [[ -f "outputdir/report.tsv" ]]; then mkdir -p "outputdir/combined_reference/" && cp "outputdir/report.tsv" "outputdir/combined_reference/report.tsv"; fi && if [[ -f "outputdir/report.html" ]]; then mkdir -p "outputdir/combined_reference/" && cp outputdir/*.html "outputdir/combined_reference/"; fi && mkdir -p '$report_html_meta.files_path' && cp outputdir/combined_reference/*.html '$report_html_meta.files_path' && if [[ -d "outputdir/icarus_viewers" ]]; then cp -R outputdir/icarus_viewers 'outputdir/combined_reference/'; fi && if [[ -d "outputdir/combined_reference/icarus_viewers" ]]; then cp -R outputdir/combined_reference/icarus_viewers '$report_html_meta.files_path'; fi && if [[ -d "outputdir/krona_charts/" ]]; then mkdir -p '$krona.files_path' && cp outputdir/krona_charts/*.html '$krona.files_path'; fi #end if ]]></command> <inputs> <conditional name="in"> <param name="custom" type="select" label="Use customized names for the input files?" help="They will be used in reports, plots and logs"> <option value="true">Yes, specify custom names</option> <option value="false" selected="true">No, use dataset names</option> </param> <when value="true"> <repeat name="inputs" title="Contigs/scaffolds" min="1"> <param name="input" type="data" format="fasta" label="Contigs/scaffolds file"/> <param argument="--labels" type="text" value="" label="Name"/> </repeat> </when> <when value="false"> <param name="inputs" type="data" format="fasta" multiple="true" label="Contigs/scaffolds file"/> </when> </conditional> <conditional name="reads"> <param name="reads_option" type="select" label="Reads options" help="Currently, the supported read types are Illumina unpaired, paired-end and mate-pair reads, PacBio SMRT, and Oxford Nanopore long reads."> <option value="disabled">Disabled</option> <option value="single">Illumina single-end reads</option> <option value="paired">Illumina paired-end reads</option> <option value="paired_collection">Illumina paired-end reads in paired collection</option> <option value="paired_interlaced">Illumina interlaced paired-end reads</option> <option value="mate_paired">Illumina mate-pair reads</option> <option value="pacbio">Pacbio SMRT reads</option> <option value="nanopore">Nanopore reads</option> </param> <when value="disabled"/> <when value="single"> <param name="input_1" format="fastq,fastq.gz,fasta,fasta.gz" type="data" multiple="true" label="FASTQ/FASTA file" /> </when> <when value="paired"> <param name="input_1" format="fastq,fastq.gz,fasta,fasta.gz" type="data" multiple="true" label="FASTQ/FASTA file #1" /> <param name="input_2" format="fastq,fastq.gz,fasta,fasta.gz" type="data" multiple="true" label="FASTQ/FASTA file #2" /> </when> <when value="paired_collection"> <param name="input_1" type="data_collection" collection_type="paired" format="fastq,fastq.gz,fasta,fasta.gz" label="FASTQ/FASTA files" /> </when> <when value="paired_interlaced"> <param name="input_1" format="fastq,fastq.gz,fasta,fasta.gz" type="data" multiple="true" label="FASTQ/FASTA file" /> </when> <when value="mate_paired"> <param name="input_1" format="fastq,fastq.gz,fasta,fasta.gz" type="data" multiple="true" label="FASTQ/FASTA file #1" /> <param name="input_2" format="fastq,fastq.gz,fasta,fasta.gz" type="data" multiple="true" label="FASTQ/FASTA file #2" /> </when> <when value="pacbio"> <param name="input_1" format="fastq,fastq.gz,fasta,fasta.gz" type="data" multiple="true" label="FASTQ/FASTA file" /> </when> <when value="nanopore"> <param name="input_1" format="fastq,fastq.gz,fasta,fasta.gz" type="data" multiple="true" label="FASTQ/FASTA file" /> </when> </conditional> <conditional name="assembly"> <param name="type" type="select" label="Type of assembly"> <option value="genome">Genome</option> <option value="metagenome">Metagenome</option> </param> <when value="genome"> <conditional name="ref"> <param name="use_ref" type="select" label="Use a reference genome?" help="Many metrics can't be evaluated without a reference. If this is omitted, QUAST will only report the metrics that can be evaluated without a reference."> <option value="true">Yes</option> <option value="false" selected="true">No</option> </param> <when value="true"> <conditional name="reference_source"> <param name="reference_source_selector" type="select" label="Select a reference genome from your history or select a cached reference genome?"> <option value="cached">Select a cached reference genome</option> <option value="history">Select a reference genome from the history and build the index</option> </param> <when value="cached"> <param argument="-r" type="select" label="Using reference genome" help="Select genome from the list"> <options from_data_table="all_fasta"> <filter type="sort_by" column="2"/> <validator type="no_options" message="No reference genomes are available"/> </options> <validator type="no_options" message="A cached reference genome is not available for the build associated with the selected input file"/> </param> </when> <when value="history"> <param argument="-r" type="data" format="fasta" multiple="true" label="Reference genome" /> </when> </conditional> <param argument="--features" type="data" format="gff, gff3, bed" optional="true" label="Genomic feature positions in the reference genome" help="Gene coordinates for the reference genome"/> <param argument="--operons" type="data" format="gff, gff3, bed" optional="true" label="Operon positions in the reference genome" help="Operon coordinates for the reference genome"/> <conditional name="k_mer"> <param argument="--k-mer-stats" type="select" label="Compute k-mer-based quality metrics?" help="It is recommended for large genomes. This may significantly increase memory and time consumption on large genomes"> <option value="--k-mer-stats">Yes</option> <option value="" selected="true">No</option> </param> <when value="--k-mer-stats"> <param name="k_mer_size" argument="--k-mer-size" type="integer" value="101" label="Size of k" /> </when> <when value=""/> </conditional> <param argument="--circos" type="boolean" truevalue="--circos" falsevalue="" checked="false" label="Generage Circos plot" help="Plot Circos version of Icarus contig alignment viewer"/> </when> <when value="false"> <param argument="--est-ref-size" type="integer" optional="true" label="Estimated reference genome size (in bp) for computing NGx statistics" help=""/> </when> </conditional> <param name="orga_type" type="select" label="Type of organism"> <option value="">Prokaryotes: use of GeneMarkS for gene finding (default)</option> <option value="--eukaryote">Eukaryote: use of GeneMark-ES for gene finding, Barrnap for ribosomal RNA genes prediction, BUSCO for conserved orthologs finding (--eukaryote)</option> <option value="--fungus">Fungus: use of GeneMark-ES for gene finding, Barrnap for ribosomal RNA genes prediction, BUSCO for conserved orthologs finding (--fungus)</option> </param> <expand macro="min_identity_macros" value="95"/> </when> <when value="metagenome"> <conditional name="ref"> <param name="origin" type="select" label="Reference genome" help="Many metrics can't be evaluated without a reference. If this is omitted, QUAST will only report the metrics that can be evaluated without a reference."> <option value="history">From history</option> <option value="list">From list</option> <option value="silva">From SILVA database</option> <option value="none" selected="true">None</option> </param> <when value="history"> <param argument="-r" type="data" format="fasta" multiple="true" label="Reference genome" /> </when> <when value="list"> <param name="references_list" argument="references-list" type="text" value="" label="Comma-separated list of reference genomes" help="MetaQUAST will search for these references in the NCBI database and will download the found ones"/> </when> <when value="silva"> <param name="max_ref_num" argument="-max-ref-num" type="integer" value="50" label="Maximum number of reference genomes (per each assembly) to download after searching in the SILVA databa" /> </when> <when value="none"/> </conditional> <param argument="--reuse-combined-alignments" type="boolean" truevalue="--reuse-combined-alignments" falsevalue="" checked="false" label="Reuse the alignments on the combined reference" help="Reuse the alignments on the combined reference in the subsequent runs per separate references. That is, the alignment procedure is performed only once (for all assemblies against the combined reference) and does NOT executed for each subgroups of contigs against the corresponding separate reference genomes. In each separate reference run, all precomputed assembly alignments for other references are simply ignored" /> <expand macro="min_identity_macros" value="90"/> </when> </conditional> <param argument="--min-contig" type="integer" value="500" label="Lower threshold for a contig length (in bp)" help="Shorter contigs won't be taken into account"/> <param argument="--split-scaffolds" type="boolean" truevalue="--split-scaffolds" falsevalue="" checked="false" label="Are assemblies scaffolds rather than contigs?" help="QUAST will add split versions of assemblies to the comparison. Assemblies are split by continuous fragments of N's of length >= 10. If broken version is equal to the original assembly (i.e. nothing was split) it is not included in the comparison."/> <param argument="--large" type="boolean" truevalue="--large" falsevalue="" checked="false" label="Is genome large (> 100 Mbp)?" help="Use optimal parameters for evaluation of large genomes. Affects speed and accuracy. In particular, imposes --eukaryote --min-contig 3000 --min-alignment 500 --extensive-mis-size 7000 (can be overridden manually with the corresponding options). In addition, this mode tries to identify misassemblies caused by transposable elements and exclude them from the number of misassemblies."/> <section name="genes" title="Genes"> <conditional name="gene_finding"> <param name="tool" type="select" label="Tool for gene prediction" help=""> <option value="none">Don't predict genes</option> <option value="--gene-finding">GeneMarkS if prokaryotes or GeneMark-ES if eukaryotes or fungi</option> <option value="--mgm">MetaGeneMark, specially for metagenomic assembly</option> <option value="--glimmer">Glimmer</option> </param> <when value="none"/> <when value="--gene-finding"> <expand macro="gene_thresholds"/> </when> <when value="--mgm"/> <when value="--glimmer"> <expand macro="gene_thresholds"/> </when> </conditional> <param argument="--rna-finding" type="boolean" truevalue="--rna-finding" falsevalue="" checked="false" label="Enables ribosomal RNA gene finding?" help="By default, we assume that the genome is prokaryotic, and Barrnap uses the bacterial database for rRNA prediction. If the genome is eukaryotic (fungal), use --eukaryote (--fungus) option to force Barrnap to work with the eukaryotic (fungal) database. "/> <param argument="--conserved-genes-finding" type="boolean" truevalue="--conserved-genes-finding" falsevalue="" checked="false" label="Enables search for Universal Single-Copy Orthologs using BUSCO?" help="By default, we assume that the genome is prokaryotic, and BUSCO uses the bacterial database of orthologs. If the genome is eukaryotic (fungal), use --eukaryote (--fungus) option to force BUSCO to work with the eukaryotic (fungal) database. "/> </section> <section name="alignments" title="Alignments"> <param argument="--use-all-alignments" type="boolean" truevalue="--use-all-alignments" falsevalue="" checked="false" label="Use all alignments as in QUAST v.1.*. to compute genome fraction, # genomic features, # operons metrics?" help="By default, QUAST v.2.0 and higher filters out ambiguous and redundant alignments, keeping only one alignment per contig (or one set of non-overlapping or slightly overlapping alignments)"/> <param argument="--min-alignment" type="integer" value="65" label="Minimum length of alignment" help="Alignments shorter than this value will be filtered. Note that all alignments shorter than 65 bp will be filtered regardless of this threshold."/> <param argument="--ambiguity-usage" type="select" label="How processing equally good alignments of a contig (probably repeats)?" help=""> <option value="none">Skip all such alignments</option> <option value="one" selected="true">Take only one (the very best one)</option> <option value="all">Use all alignments. It can cause a significant increase of # mismatches (repeats are almost always inexact due to accumulated SNPs, indels, etc.). It is useful for metagenomic assemblies where ambiguous alignments might represent homologous sequences of different strains</option> </param> <param argument="--ambiguity-score" type="float" value="0.99" min="0.8" max="1.0" label="Score S for defining equally good alignments of a single contig" help="All alignments are sorted by decreasing LEN × IDY% value. All alignments with LEN × IDY% less than S × best(LEN × IDY%) are discarded. "/> <param argument="--fragmented" type="boolean" truevalue="--fragmented" falsevalue="" checked="false" label="Fragmented reference genome" help="Reference genome is fragmented (e.g. a scaffold reference). QUAST will try to detect misassemblies caused by the fragmentation and mark them fake (will be excluded from misassemblies). Note: QUAST will not detect misassemblies caused by the linear representation of circular genome "/> <param argument="--upper-bound-assembly" type="boolean" truevalue="--upper-bound-assembly" falsevalue="" label="Simulate upper bound assembly" help="Simulate upper bound assembly based on the reference genome and a given set reads (mate-pairs or long reads, such as Pacbio SMRT/Oxford Nanopore, are REQUIRED). This assembly is added to the comparison and could be useful for estimating the upper bounds of completeness and contiguity that theoretically can be reached by assembly software from this particular set of reads. The concept is based on the fact that the reference genome cannot be completely reconstructed from raw reads due to long genomic repeats and low covered regions." /> <param argument="--upper-bound-min-con" type="integer" value="" optional="true" label="Minimal number of 'connecting reads' needed for joining upper bound contigs into a scaffold" help="This is important for a realistic estimation of genome assembly fragmentation due to long repeats. The default values is 2 for mate-pairs and 1 for long reads (PacBio or Nanopore libraries)"/> <param argument="--local-mis-size" type="integer" value="200" optional="true" label="Minimal local misassembly size" help="Lower threshold for the local misassembly size. Shorter inconsistencies are considered as (long) indels. The default value is 200 bp. Note that the threshold should be equal to or lower than minimal extensive misassembly size, which is 1000 bp by default"/> </section> <section name="advanced" title="Advanced options"> <param argument="--contig-thresholds" type="text" value="0,1000" label="Comma-separated list of contig length thresholds (in bp)" help="Used in # contigs ≥ x and total length (≥ x) metrics"/> <param argument="--strict-NA" type="boolean" truevalue="--strict-NA" falsevalue="" checked="false" label="Break contigs at every misassembly event (including local ones) to compute NAx and NGAx statistics?" help="By default, QUAST breaks contigs only at extensive misassemblies (not local ones)."/> <param argument="--extensive-mis-size" type="integer" value="1000" min="85" label="Lower threshold for the relocation size (gap or overlap size between left and right flanking sequence)" help="Shorter relocations are considered as local misassemblies. It does not affect other types of extensive misassemblies (inversions and translocations). The default value is 1000 bp. Note that the threshold should be greater than maximum indel length which is 85 bp."/> <param argument="--scaffold-gap-max-size" type="integer" value="1000" label="Max allowed scaffold gap length difference for detecting corresponding type of misassemblies" help="Longer inconsistencies are considered as relocations and thus, counted as extensive misassemblies. The default value is 10000 bp. Note that the threshold make sense only if it is greater than extensive misassembly size"/> <param argument="--unaligned-part-size" type="integer" value="500" label="Lower threshold for detecting partially unaligned contigs" help=""/> <param argument="--skip-unaligned-mis-contigs" type="boolean" truevalue="" falsevalue="--skip-unaligned-mis-contigs" checked="true" label="Distinguish contigs with more than 50% unaligned bases as a separate group of contigs?" help="By default, QUAST breaks contigs only at extensive misassemblies (not local ones)."/> <param argument="--fragmented-max-indent" type="integer" min="0" value="" optional="true" label="Fragment max indent" help="Mark translocation as fake if both alignments are located no further than N bases from the ends of the reference fragments. The value should be less than extensive misassembly size.Default value is 50. Note: requires --fragmented option" /> <param argument="--report-all-metrics" type="boolean" truevalue="--report-all-metrics" falsevalue="" checked="false" label="Report all metrics" help="Keep all quality metrics in the main report. Usually, all not-relevant metrics are not included in the report, e.g., reference-based metrics in the no-reference mode. Also, if metric values are undefined for all input assemblies, the metric is removed from the report" /> <param argument="--x-for-Nx" type="integer" min="0" max="100" value="90" label="Report Nx, Lx, etc metrics for specific value of 'x'" help="Value of 'x' for Nx, Lx, NGx, NGAx, etc metrics reported in addition to N50, L50, NG50, NGA50, etc" /> </section> <param name="output_files" type="select" display="checkboxes" optional="true" multiple="true" label="Output files"> <option value="html" selected="true">HTML reports</option> <option value="pdf">PDF reports</option> <option value="tabular">Tabular reports</option> <option value="log">Log file</option> <option value="summary">Key metric summary (metagenome mode)</option> <option value="krona">Krona charts (metagenome mode without reference genomes)</option> </param> </inputs> <outputs> <data name="report_tabular" format="tabular" label="${tool.name} on ${on_string}: tabular report" from_work_dir="outputdir/report.tsv"> <filter>assembly['type'] == 'genome' and 'tabular' in output_files</filter> </data> <data name="report_tabular_meta" format="tabular" label="${tool.name} on ${on_string}: tabular report for combined reference genome" from_work_dir="outputdir/combined_reference/report.tsv"> <filter>assembly['type'] == 'metagenome' and 'tabular' in output_files</filter> </data> <data name="report_html" format="html" label="${tool.name} on ${on_string}: HTML report" from_work_dir="outputdir/report.html"> <filter>assembly['type'] == 'genome' and 'html' in output_files</filter> </data> <data name="report_html_meta" format="html" label="${tool.name} on ${on_string}: HTML report for combined reference genome" from_work_dir="outputdir/combined_reference/report.html"> <filter>assembly['type'] == 'metagenome' and 'html' in output_files</filter> </data> <data name="report_pdf" format="pdf" label="${tool.name} on ${on_string}: PDF report" from_work_dir="outputdir/report.pdf"> <filter>assembly['type'] == 'genome' and 'pdf' in output_files</filter> </data> <data name="log" format="txt" label="${tool.name} on ${on_string}: Log" from_work_dir="outputdir/quast.log"> <filter>assembly['type'] == 'genome' and 'log' in output_files</filter> </data> <data name="log_meta" format="txt" label="${tool.name} on ${on_string}: Log" from_work_dir="outputdir/metaquast.log"> <filter>assembly['type'] == 'metagenome' and 'log' in output_files</filter> </data> <data name="mis_ass" format="tabular" label="${tool.name} on ${on_string}: Misassemblies report" from_work_dir="outputdir/contigs_reports/misassemblies_report.txt"> <filter>assembly['type'] == 'genome' and assembly['ref']['use_ref'] == 'true'</filter> <filter>'tabular' in output_files</filter> </data> <data name="unalign" format="tabular" label="${tool.name} on ${on_string}: Unaligned contigs report" from_work_dir="outputdir/contigs_reports/unaligned_report.tsv"> <filter>assembly['type'] == 'genome' and assembly['ref']['use_ref'] == 'true'</filter> <filter>'tabular' in output_files</filter> </data> <data name="kmers" format="tabular" label="${tool.name} on ${on_string}: K-mer-based metrics report" from_work_dir="outputdir/k_mer_stats/kmers_report.txt"> <filter>assembly['type'] == 'genome' and assembly['ref']['use_ref'] == 'true' and assembly['ref']['k_mer']['k_mer_stats'] != ''</filter> <filter>'tabular' in output_files</filter> </data> <data name="circos_output" format="png" from_work_dir="outputdir/circos/circos.png" label="${tool.name} on ${on_string}: Circos plot"> <filter>assembly['type'] == 'genome' and assembly['ref']['use_ref'] == 'true' and assembly['ref']['circos']</filter> </data> <collection name="metrics_tabular" type="list" label="${tool.name} on ${on_string}: Tabular reports for key metrics" > <discover_datasets pattern="(?P<designation>.+).tsv" directory="outputdir/summary/TSV/" format="tabular"/> <filter>assembly['type'] == 'metagenome' and 'summary' in output_files</filter> </collection> <collection name="metrics_pdf" type="list" label="${tool.name} on ${on_string}: PDF reports for key metrics" > <discover_datasets pattern="(?P<designation>.+).pdf" directory="outputdir/summary/PDF/" format="pdf"/> <filter>assembly['type'] == 'metagenome' and 'summary' in output_files</filter> </collection> <data name="krona" format="html" label="${tool.name} on ${on_string}: Krona chart" from_work_dir="outputdir/krona_charts/*.html"> <filter>assembly['type'] == 'metagenome' and assembly['ref']['origin'] == 'none' and 'krona' in output_files</filter> </data> </outputs> <tests> <!-- Test 01: reference, genes annotations and operon coordinates --> <test expect_num_outputs="2"> <conditional name="in"> <param name="custom" value="true"/> <repeat name="inputs"> <param name="input" value="contigs1.fna"/> <param name="labels" value="contig1"/> </repeat> <repeat name="inputs"> <param name="input" value="contigs2.fna"/> <param name="labels" value="contig2"/> </repeat> </conditional> <conditional name="assembly"> <param name="type" value="genome"/> <conditional name="ref"> <param name="use_ref" value="true"/> <conditional name="reference_source"> <param name="reference_source_selector" value="history"/> </conditional> <param name="r" value="reference.fna"/> <param name="features" value="genes.gff"/> <param name="operons" value="operons.bed"/> <conditional name="k_mer"> <param name="k_mer_stats" value="--k-mer-stats"/> <param name="k_mer_size" value="101" /> </conditional> <param name="circos" value="true"/> </conditional> <param name="orga_type" value=""/> </conditional> <param name="min_contig" value="500"/> <param name="split_scaffolds" value="false"/> <section name="genes"> <conditional name="gene_finding"> <param name="tool" value="--gene_finding"/> <param name="gene_thresholds" value="0,300,1500,3000"/> </conditional> <param name="rna_finding" value="true"/> <param name="conserved_genes_finding" value="true"/> <param name="min_identity" value="95.0"/> </section> <section name="alignments"> <param name="use_all_alignments" value="true"/> <param name="min_alignment" value="65"/> <param name="ambiguity_usage" value="one"/> <param name="ambiguity_score" value="0.99"/> </section> <section name="advanced"> <param name="contig_thresholds" value="0,1000"/> <param name="strict_NA" value="true"/> <param name="extensive_mis_size" value="1000"/> <param name="scaffold_gap_max_size" value="1000"/> <param name="unaligned_part_size" value="500"/> <param name="skip_unaligned_mis_contigs" value="true"/> <param name="fragmented_max_indent" value="50"/> </section> <param name="output_files" value="html"/> <output name="report_html" file="test1_report.html" ftype="html" compare="sim_size"/> <output name="circos_output" file="test1_circos.png" ftype="png" compare="sim_size"/> </test> <!-- Test 02: all outputs --> <test expect_num_outputs="8"> <conditional name="in"> <param name="custom" value="true"/> <repeat name="inputs"> <param name="input" value="contigs1.fna"/> <param name="labels" value="contig1"/> </repeat> <repeat name="inputs"> <param name="input" value="contigs2.fna"/> <param name="labels" value="contig2"/> </repeat> </conditional> <conditional name="assembly"> <param name="type" value="genome"/> <conditional name="ref"> <param name="use_ref" value="true"/> <conditional name="reference_source"> <param name="reference_source_selector" value="cached"/> </conditional> <param name="r" value="reference"/> <param name="features" value="genes.gff"/> <param name="operons" value="operons.bed"/> <conditional name="k_mer"> <param name="k_mer_stats" value="--k-mer-stats"/> <param name="k_mer_size" value="101" /> </conditional> <param name="circos" value="true"/> </conditional> </conditional> <param name="output_files" value="html,pdf,tabular,log"/> <output name="report_html" file="test2_report.html" ftype="html" compare="sim_size"/> <output name="report_pdf" file="test2_report.pdf" ftype="pdf" compare="sim_size"/> <output name="report_tabular" file="test2_report.tab" ftype="tabular"/> <output name="log" file="test2.log" ftype="txt" compare="sim_size"/> <output name="mis_ass" file="test2_missasemblies.tab" ftype="tabular"/> <output name="unalign" file="test2_unaligned.tab" ftype="tabular"/> <output name="kmers" file="test2_kmers.tab" ftype="tabular"/> <output name="circos_output" file="test2_circos.png" ftype="png" compare="sim_size"/> </test> <!-- Test 03: without reference --> <test expect_num_outputs="3"> <conditional name="in"> <param name="custom" value="false"/> <param name="inputs" value="contigs1.fna,contigs2.fna"/> </conditional> <conditional name="assembly"> <param name="type" value="genome"/> <conditional name="ref"> <param name="use_ref" value="false"/> </conditional> <param name="orga_type" value="--eukaryote"/> <param name="min_identity" value="95.0"/> </conditional> <param name="min_contig" value="500"/> <param name="split_scaffolds" value="false"/> <param name="large" value="false"/> <section name="genes"> <conditional name="gene_finding"> <param name="tool" value="none"/> </conditional> <param name="rna_finding" value="false"/> <param name="conserved_genes_finding" value="false"/> </section> <section name="alignments"> <param name="use_all_alignments" value="false"/> <param name="min_alignment" value="65"/> <param name="ambiguity_usage" value="one"/> <param name="ambiguity_score" value="0.99"/> <param name="fragmented" value="false"/> </section> <section name="advanced"> <param name="contig_thresholds" value="0,1000, 500"/> <param name="strict_NA" value="false"/> <param name="extensive_mis_size" value="1000"/> <param name="scaffold_gap_max_size" value="1000"/> <param name="unaligned_part_size" value="500"/> <param name="skip_unaligned_mis_contigs" value="-"/> </section> <param name="output_files" value="html,pdf,log" /> <output name="log" file="test3.log" ftype="txt" compare="sim_size"/> <output name="report_html" file="test3_report.html" compare="sim_size"/> <output name="report_pdf" file="test3_report.pdf" compare="sim_size"/> </test> <!-- Test 04: metagenomics --> <test expect_num_outputs="3"> <conditional name="in"> <param name="custom" value="false"/> <param name="inputs" value="contigs3.fasta"/> </conditional> <conditional name="assembly"> <param name="type" value="metagenome"/> <conditional name="ref"> <param name="origin" value="none"/> </conditional> </conditional> <param name="min_contig" value="500"/> <param name="split_scaffolds" value="false"/> <param name="large" value="false"/> <section name="genes"> <conditional name="gene_finding"> <param name="tool" value="--mgm"/> </conditional> <param name="rna_finding" value="false"/> <param name="conserved_genes_finding" value="false"/> </section> <section name="alignments"> <param name="use_all_alignments" value="false"/> <param name="min_alignment" value="65"/> <param name="min_identity" value="95.0"/> <param name="ambiguity_usage" value="one"/> <param name="ambiguity_score" value="0.99"/> <param name="fragmented" value="false"/> </section> <section name="advanced"> <param name="contig_thresholds" value="0,1000, 500"/> <param name="strict_NA" value="false"/> <param name="extensive_mis_size" value="1000"/> <param name="scaffold_gap_max_size" value="1000"/> <param name="unaligned_part_size" value="500"/> <param name="skip_unaligned_mis_contigs" value="-"/> </section> <param name="output_files" value="log,html,tabular"/> <output name="log_meta" ftype="txt"> <assert_contents> <has_text text="Reference genomes are not found" /> </assert_contents> </output> <output name="report_tabular_meta" ftype="tabular"> <assert_contents> <has_text text="# contigs (>= 0 bp)"/> <has_text text="contigs3_fasta"/> <has_text text="# N's per 100 kbp"/> <has_n_lines n="17"/> </assert_contents> </output> <output name="report_html_meta" ftype="html"> <assert_contents> <has_text text="Quality Assessment Tool for Genome Assemblies" /> <has_text text="contigs3_fasta" /> <has_text text="Statistics without reference" /> </assert_contents> </output> </test> <!-- Test 05: FASTQ read files --> <test expect_num_outputs="3"> <conditional name="in"> <param name="custom" value="true"/> <repeat name="inputs"> <param name="input" value="contigs1.fna"/> <param name="labels" value="contig1"/> </repeat> <repeat name="inputs"> <param name="input" value="contigs2.fna"/> <param name="labels" value="contig2"/> </repeat> </conditional> <conditional name="reads"> <param name="reads_option" value="pacbio"/> <param name="input_1" value="pacbio_01.fastq,pacbio_02.fastq,pacbio_03.fastq,pacbio_04.fastq"/> </conditional> <conditional name="assembly"> <param name="type" value="genome"/> <conditional name="ref"> <param name="use_ref" value="true"/> <conditional name="reference_source"> <param name="reference_source_selector" value="history"/> </conditional> <param name="r" value="reference.fna"/> </conditional> </conditional> <section name="alignments"> <param name="upper_bound_assembly" value="true"/> <param name="upper_bound_min_con" value="1"/> </section> <param name="output_files" value="tabular"/> <output name="report_tabular" file="test5.tab" ftype="tabular"/> <output name="mis_ass" ftype="tabular"> <assert_contents> <has_text text="All statistics are based on contigs of size >= 500 bp"/> <has_text text="# scaffold misassemblies"/> <has_text text="contig1"/> </assert_contents> </output> <output name="unalign" ftype="tabular"> <assert_contents> <has_text text="Fully unaligned length"/> <has_text text="contig1"/> </assert_contents> </output> </test> <!-- Test 06: FASTQ.gz read files --> <test expect_num_outputs="1"> <conditional name="in"> <param name="custom" value="true"/> <repeat name="inputs"> <param name="input" value="contigs1.fna"/> <param name="labels" value="contig1"/> </repeat> <repeat name="inputs"> <param name="input" value="contigs2.fna"/> <param name="labels" value="contig2"/> </repeat> </conditional> <conditional name="reads"> <param name="reads_option" value="single"/> <param name="input_1" value="pacbio_01.fastq.gz,pacbio_02.fastq.gz"/> </conditional> <param name="output_files" value="tabular"/> <output name="report_tabular" file="test6.tab" ftype="tabular"/> </test> <!-- Test 07: FASTA.gz read files --> <test expect_num_outputs="1"> <conditional name="in"> <param name="custom" value="true"/> <repeat name="inputs"> <param name="input" value="contigs1.fna"/> <param name="labels" value="contig1"/> </repeat> <repeat name="inputs"> <param name="input" value="contigs2.fna"/> <param name="labels" value="contig2"/> </repeat> </conditional> <conditional name="reads"> <param name="reads_option" value="single"/> <param name="input_1" value="pacbio_01.fasta.gz,pacbio_02.fasta.gz"/> </conditional> <param name="output_files" value="tabular"/> <output name="report_tabular" file="test7.tab" ftype="tabular"/> </test> <!-- Test 08: metagenomics all tab outputs--> <test expect_num_outputs="3"> <conditional name="in"> <repeat name="inputs"> <param name="input" value="meta_contigs_1.fasta"/> <param name="labels" value="meta_contigs_1"/> </repeat> <repeat name="inputs"> <param name="input" value="meta_contigs_2.fasta"/> <param name="labels" value="meta_contigs_2"/> </repeat> </conditional> <conditional name="assembly"> <param name="type" value="metagenome"/> <conditional name="ref"> <param name="origin" value="history"/> <param name="r" value="meta_ref_1.fasta,meta_ref_2.fasta,meta_ref_3.fasta"/> </conditional> <param name="min_identity" value="95.0"/> </conditional> <param name="min_contig" value="500"/> <param name="split_scaffolds" value="false"/> <param name="large" value="false"/> <section name="genes"> <conditional name="gene_finding"> <param name="tool" value="none"/> </conditional> <param name="rna_finding" value="false"/> <param name="conserved_genes_finding" value="false"/> </section> <section name="alignments"> <param name="use_all_alignments" value="false"/> <param name="min_alignment" value="65"/> <param name="ambiguity_usage" value="one"/> <param name="ambiguity_score" value="0.99"/> <param name="fragmented" value="false"/> </section> <section name="advanced"> <param name="contig_thresholds" value="0,1000"/> <param name="strict_NA" value="false"/> <param name="extensive_mis_size" value="1000"/> <param name="scaffold_gap_max_size" value="1000"/> <param name="unaligned_part_size" value="500"/> <param name="skip_unaligned_mis_contigs" value="-"/> </section> <param name="output_files" value="tabular,summary"/> <output name="report_tabular_meta" ftype="tabular"> <assert_contents> <has_text text="# contigs (>= 0 bp)"/> <has_text text="meta_ref_3_fasta"/> <has_text text="# N's per 100 kbp"/> <has_n_lines n="34"/> </assert_contents> </output> <output_collection name="metrics_tabular" type="list" count="15"/> <output_collection name="metrics_pdf" type="list" count="16"/> </test> <!-- Test 09: metagenomics log, html and krona outputs--> <test expect_num_outputs="2"> <conditional name="in"> <repeat name="inputs"> <param name="input" value="meta_contigs_1.fasta"/> <param name="labels" value="meta_contigs_1"/> </repeat> <repeat name="inputs"> <param name="input" value="meta_contigs_2.fasta"/> <param name="labels" value="meta_contigs_2"/> </repeat> </conditional> <conditional name="assembly"> <param name="type" value="metagenome"/> <conditional name="ref"> <param name="origin" value="list"/> <param name="references_list" value="Lactobacillus_delbrueckii_bulgaricus,Lactobacillus_reuteri"/> </conditional> <param name="min_identity" value="95.0"/> </conditional> <param name="min_contig" value="500"/> <param name="split_scaffolds" value="false"/> <param name="large" value="false"/> <section name="genes"> <conditional name="gene_finding"> <param name="tool" value="none"/> </conditional> <param name="rna_finding" value="false"/> <param name="conserved_genes_finding" value="false"/> </section> <section name="alignments"> <param name="use_all_alignments" value="false"/> <param name="min_alignment" value="65"/> <param name="ambiguity_usage" value="all"/> <param name="ambiguity_score" value="0.99"/> <param name="fragmented" value="false"/> </section> <section name="advanced"> <param name="contig_thresholds" value="0,1000"/> <param name="strict_NA" value="false"/> <param name="extensive_mis_size" value="1000"/> <param name="scaffold_gap_max_size" value="1000"/> <param name="unaligned_part_size" value="500"/> <param name="skip_unaligned_mis_contigs" value="-"/> </section> <param name="output_files" value="html,log"/> <output name="report_html_meta" ftype="html"> <assert_contents> <has_text text="meta_contigs_2_fasta" /> <has_text text="combined_reference" /> <has_text text="Lactobacillus" /> </assert_contents> </output> <output name="log_meta" ftype="txt"> <assert_contents> <has_text text="List of references was provided, starting to download reference genomes from NCBI" /> <has_text text="Lactobacillus_delbrueckii_bulgaricus" /> </assert_contents> </output> </test> <!-- Test 10: Test new options --> <test expect_num_outputs="1"> <conditional name="in"> <param name="custom" value="true"/> <repeat name="inputs"> <param name="input" value="contigs1.fna"/> <param name="labels" value="contig1"/> </repeat> <repeat name="inputs"> <param name="input" value="contigs2.fna"/> <param name="labels" value="contig2"/> </repeat> </conditional> <section name="alignments"> <param name="local_mis_size" value="210"/> </section> <conditional name="assembly"> <param name="type" value="genome"/> <conditional name="ref"> <param name="use_ref" value="false"/> </conditional> </conditional> <section name="advanced"> <param name="report_all_metrics" value="true"/> <param name="x_for_Nx" value="80"/> </section> <param name="output_files" value="tabular"/> <output name="report_tabular" file="test10_tabular_report.tab" ftype="tabular"/> </test> <!-- Test 11: Test paired fastq.gz inputs --> <test expect_num_outputs="1"> <conditional name="in"> <param name="custom" value="true"/> <repeat name="inputs"> <param name="input" value="contigs1.fna"/> <param name="labels" value="contig1"/> </repeat> </conditional> <conditional name="reads"> <param name="reads_option" value="paired"/> <param name="input_1" value="reads1.fastq.gz" ftype="fastqsanger.gz"/> <param name="input_2" value="reads2.fastq.gz" ftype="fastqsanger.gz"/> </conditional> <conditional name="assembly"> <param name="type" value="genome"/> <conditional name="ref"> <param name="use_ref" value="false"/> </conditional> </conditional> <param name="output_files" value="tabular"/> <output name="report_tabular" ftype="tabular"> <assert_contents> <has_text text="# contigs (>= 0 bp)"/> <has_text text="contig1"/> <has_text text="# N's per 100 kbp"/> <has_n_lines n="22"/> </assert_contents> </output> </test> <!-- Test 12: Test paired-collection fastq.gz inputs --> <test expect_num_outputs="1"> <conditional name="in"> <param name="custom" value="true"/> <repeat name="inputs"> <param name="input" value="contigs1.fna"/> <param name="labels" value="contig1"/> </repeat> </conditional> <conditional name="reads"> <param name="reads_option" value="paired_collection"/> <param name="input_1"> <collection type="paired"> <element name="forward" value="reads1.fastq.gz" ftype="fastqsanger.gz"/> <element name="reverse" value="reads2.fastq.gz" ftype="fastqsanger.gz"/> </collection> </param> </conditional> <conditional name="assembly"> <param name="type" value="genome"/> <conditional name="ref"> <param name="use_ref" value="false"/> </conditional> </conditional> <param name="output_files" value="tabular"/> <output name="report_tabular" ftype="tabular"> <assert_contents> <has_text text="# contigs (>= 0 bp)"/> <has_text text="contig1"/> <has_text text="# N's per 100 kbp"/> <has_n_lines n="15"/> </assert_contents> </output> </test> </tests> <help> <![CDATA[ **What it does** QUAST = QUality ASsessment Tool. The tool evaluates genome assemblies by computing various metrics. If you have one or multiple genome assemblies, you can assess their quality with Quast. It works with or without reference genome. If you are new to Quast, start by reading its `manual page <http://quast.sourceforge.net/docs/manual.html>`_. **Using Quast without reference** Without reference Quast can calculate a number of assembly related-metrics but cannot provide any information about potential misassemblies, inversions, translocations, etc. Suppose you have three assemblies produced by Unicycler corresponding to three different antibiotic treatments *car*, *pit*, and *cef* (these stand for carbenicillin, piperacillin, and cefsulodin, respectively). Evaluating them without reference will produce the following Quast outputs: * Quast report in HTML format * `Contig viewer <http://quast.sourceforge.net/docs/manual.html#sec3.4>`_ (an HTML file) * `Quast report <http://quast.sourceforge.net/docs/manual.html#sec3.1.1>`_ in Tab-delimited format * Quast log (a file technical information about Quast tool execution) The **tab delimited Quast report** will contain the following information:: Assembly pit_fna cef_fna car_fna # contigs (>= 0 bp) 100 91 94 # contigs (>= 1000 bp) 62 58 61 Total length (>= 0 bp) 6480635 6481216 6480271 Total length (>= 1000 bp) 6466917 6468946 6467103 # contigs 71 66 70 Largest contig 848753 848766 662053 Total length 6473173 6474698 6473810 GC (%) 66.33 66.33 66.33 N50 270269 289027 254671 N75 136321 136321 146521 L50 7 7 8 L75 15 15 16 # N's per 100 kbp 0.00 0.00 0.00 where values are defined as specified in `Quast manual <http://quast.sourceforge.net/docs/manual.html#sec3.1.1>`_ **Quast report in HTML format** contains graphs in addition to the above metrics, while **Contig viewer** draws contigs ordered from longest to shortest. This ordering is suitable for comparing only largest contigs or number of contigs longer than a specific threshold. The viewer shows N50 and N75 with color and textual indication. If the reference genome is available or at least approximate genome length is known (see `--est-ref-size`), NG50 and NG75 are also shown. You can also tone down contigs shorter than a specified threshold using Icarus control panel: .. image:: $PATH_TO_IMAGES/contig_view_noR.png :width: 558 :height: 412 Also see `Plot description <http://quast.sourceforge.net/docs/manual.html#sec2>`_ section of the manual. **Using Quast with reference** Car, pit, and cef are in fact assemblies of *Pseudomonas aeruginosa* UCBPP-PA14, so we can use its genome as a reference (by supplying a Fasta file containing *P. aeruginosa* pa14 genome to **Reference genome** input box). The following outputs will be produced (note the alignment viewer): * Quast report in HTML format * `Contig viewer <http://quast.sourceforge.net/docs/manual.html#sec3.4>`_ (an HTML file) * `Alignment viewer <http://quast.sourceforge.net/docs/manual.html#sec3.4>`_ (an HTML file) * `Quast report <http://quast.sourceforge.net/docs/manual.html#sec3.1.1>`_ in Tab-delimited format * Summary of `misassemblies <http://quast.sourceforge.net/docs/manual.html#sec3.1.2>`_ * Summary of `unaligned contigs <http://quast.sourceforge.net/docs/manual.html#sec3.1.3>`_ * Quast log (a file technical information about Quast tool execution) With the reference Quast produces a much more comprehensive set of results:: Assembly pit_fna cef_fna car_fna # contigs (>= 0 bp) 100 91 94 # contigs (>= 1000 bp) 62 58 61 Total length (>= 0 bp) 6480635 6481216 6480271 Total length (>= 1000 bp) 6466917 6468946 6467103 # contigs 71 66 70 Largest contig 848753 848766 662053 Total length 6473173 6474698 6473810 Reference length 6537648 6537648 6537648 GC (%) 66.33 66.33 66.33 Reference GC (%) 66.29 66.29 66.29 N50 270269 289027 254671 NG50 270269 289027 254671 N75 136321 136321 146521 NG75 136321 136321 136321 L50 7 7 8 LG50 7 7 8 L75 15 15 16 LG75 15 15 17 # misassemblies 0 0 0 # misassembled contigs 0 0 0 Misassembled contigs length 0 0 0 # local misassemblies 1 1 2 # unaligned mis. contigs 0 0 0 # unaligned contigs 0 + 0 0 + 0 0 + 0 part part part Unaligned length 0 0 0 Genome fraction (%) 99.015 99.038 99.025 Duplication ratio 1.000 1.000 1.000 # N's per 100 kbp 0.00 0.00 0.00 # mismatches per 100 kbp 3.82 3.63 3.49 # indels per 100 kbp 1.19 1.13 1.13 Largest alignment 848753 848766 662053 Total aligned length 6473163 6474660 6473792 NA50 270269 289027 254671 NGA50 270269 289027 254671 NA75 136321 136321 146521 NGA75 136321 136321 136321 LA50 7 7 8 LGA50 7 7 8 LA75 15 15 16 LGA75 15 15 17 where, again, values are defined as specified in `Quast manual <http://quast.sourceforge.net/docs/manual.html#sec3.1.1>`_. You can see that this report includes a variety of data that can only be computer against a reference assembly. Using reference also produces an **Alignment viewer**: .. image:: $PATH_TO_IMAGES/Align_view.png :width: 515 :height: 395 Alignment viewer highlights regions of interest as, in this case, missassemblies that can potentially point to genome rearrangements (see more `here <http://quast.sourceforge.net/docs/manual.html#sec3.4>`_). ]]> </help> <expand macro="citations"/> </tool>