Mercurial > repos > iuc > quast

<tool id="quast" name="Quast" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01">
    <description>Genome assembly Quality</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="bio_tools"/>
    <expand macro='requirements' />
    <command detect_errors="exit_code">
    <![CDATA[
#import re

#if str($in.custom) == 'false'
    #set $labels = ','.join( [re.sub('[^\w\-_]', '_', str($x.element_identifier)) for $x in $in.inputs])
    echo $labels &&
#else
    #set $labels = []
    #for $x in $in.inputs
        #if str($x.labels) != ''
            #silent $labels.append(re.sub('[^\w\-_]', '_', str($x.labels)))
        #else
            #silent $labels.append(re.sub('[^\w\-_]', '_', str($x.input.element_identifier)))
        #end if
    #end for
    #set $labels = ','.join($labels)
#end if

#if $assembly.type == 'metagenome' and $assembly.ref.origin == 'list'
    #set $temp_ref_list_fp = 'temp_ref_list_fp'
    #set $temp_ref_list_f = open($temp_ref_list_fp, 'w')
    #silent $temp_ref_list_f.write('\n'.join([x.strip() for x in $assembly.ref.references_list.split(',')]))
    #silent $temp_ref_list_f.close()
#end if

#if $assembly.type == 'genome'
    quast
#else
    metaquast
#end if

#if $reads.reads_option == 'single'
    #for $read in $reads.input_1
        --single '$read'
    #end for
#else if $reads.reads_option == 'paired'
    #for $read in $reads.input_1
        --pe1 '$read'
    #end for
    #for $read in $reads.input_2
        --pe2 '$read'
    #end for
#else if $reads.reads_option == 'paired_interlaced'
    #for $read in $reads.input_1
        --pe12 '$read'
    #end for
#else if $reads.reads_option == 'mate_paired'
    #for $read in $reads.input_1
        --mp1 '$read'
    #end for
    #for $read in $reads.input_2
        --mp2 '$read'
    #end for
#else if $reads.reads_option == 'pacbio'
    #for $read in $reads.input_1
        --pacbio '$read'
    #end for
#else if $reads.reads_option == 'nanopore'
    #for $read in $reads.input_1
        --nanopore '$read'
    #end for
#end if

--labels $labels
-o outputdir

#if $assembly.type == 'genome'
    #if $assembly.ref.use_ref == 'true'
    -r '$assembly.ref.r'
        #if $assembly.ref.features
            --features '$assembly.ref.features'
        #end if
        #if $assembly.ref.operons
            --operons '$assembly.ref.operons'
        #end if
        $assembly.ref.circos
        $assembly.ref.k_mer.k_mer_stats
        #if str($assembly.ref.k_mer.k_mer_stats) != ''
            --k-mer-size $assembly.ref.k_mer.k_mer_size
        #end if
    #else if $assembly.ref.est_ref_size
        --est-ref-size $assembly.ref.est_ref_size
    #end if
    $assembly.orga_type
#else
    #if $assembly.ref.origin == 'history'
        -r '$assembly.ref.r'
    #else if $assembly.ref.origin == 'list'
        --references-list '$temp_ref_list_fp'
    #else if $assembly.ref.origin == 'silva'
        --test-no-ref
        --max-ref-num '$assembly.ref.max_ref_num'
    #end if
#end if

--min-contig $min_contig
$split_scaffolds
$large

#if str($genes.gene_finding.tool) != 'none'
    $genes.gene_finding.tool
    #if $genes.gene_finding.tool == '--gene_finding' or $genes.gene_finding.tool == '--glimmer'
        #set $gene_threshold = ','.join([x.strip() for x in str($genes.gene_finding.gene_thresholds).split(',')])
        --gene-thresholds '$gene_threshold'
    #end if
#end if

$genes.rna_finding
$genes.conserved_genes_finding
$alignments.use_all_alignments
--min-alignment $alignments.min_alignment
--min-identity $alignments.min_identity
--ambiguity-usage '$alignments.ambiguity_usage'
$alignments.fragmented
$alignments.upper_bound_assembly
#if $alignments.upper_bound_min_con
    --upper-bound-min-con $alignments.upper_bound_min_con
#end if

#if $alignments.fragmented
    #if $advanced.fragmented_max_indent != ''
        --fragmented-max-indent $advanced.fragmented_max_indent
    #end if
#end if

#set $contig_thresholds = ','.join([x.strip() for x in str($advanced.contig_thresholds).split(',')])
--contig-thresholds '$contig_thresholds'
$advanced.strict_NA
--extensive-mis-size $advanced.extensive_mis_size
--scaffold-gap-max-size $advanced.scaffold_gap_max_size
--unaligned-part-size $advanced.unaligned_part_size
$advanced.skip_unaligned_mis_contigs

#if str($in.custom) == 'false'
    #for $k in $in.inputs
        '$k'
    #end for
#else
    #for $k in $in.inputs
        '$k.input'
    #end for
#end if

--threads \${GALAXY_SLOTS:-1}

&& mkdir -p '$report_html.files_path'
&& cp outputdir/*.html '$report_html.files_path'

#if ($assembly.type == 'genome' and $assembly.ref.use_ref) or ($assembly.type == 'metagenome' and $assembly.ref.origin != 'none')
    && cp -R outputdir/icarus_viewers '$report_html.files_path'
#end if

    ]]>
    </command>
    <inputs>
        <conditional name="in">
            <param name="custom" type="select" label="Use customized names for the input files?" help="They will be used in reports, plots and logs">
                <option value="true">Yes, specify custom names</option>
                <option value="false" selected="true">No, use dataset names</option>
            </param>
            <when value="true">
                <repeat name="inputs" title="Contigs/scaffolds" min="1">
                    <param name="input" type="data" format="fasta" label="Contigs/scaffolds file"/>
                    <param argument="--labels" type="text" value="" label="Name"/>
                </repeat>
            </when>
            <when value="false">
                <param name="inputs" type="data" format="fasta" multiple="true" label="Contigs/scaffolds file"/>
            </when>
        </conditional>
        <conditional name="reads">
            <param name="reads_option" type="select" label="Reads options" help="Currently, the supported read types are Illumina unpaired, paired-end and mate-pair reads, PacBio SMRT, and Oxford Nanopore long reads.">
                <option value="disabled">Disabled</option>
                <option value="single">Illumina single-end reads</option>
                <option value="paired">Illumina paired-end reads</option>
                <option value="paired_interlaced">Illumina interlaced paired-end reads</option>
                <option value="mate_paired">Illumina mate-pair reads</option>
                <option value="pacbio">Pacbio SMRT reads</option>
                <option value="nanopore">Nanopore reads</option>
            </param>
            <when value="disabled"/>

            <when value="single">
                <param name="input_1" format="fastq,fastq.gz" type="data" multiple="true" label="FASTQ file" />
            </when>

            <when value="paired">
                <param name="input_1" format="fastq,fastq.gz" type="data" multiple="true" label="FASTQ file #1" />
                <param name="input_2" format="fastq,fastq.gz" type="data" multiple="true" label="FASTQ file #2" />
            </when>

            <when value="paired_interlaced">
                <param name="input_1" format="fastq,fastq.gz" type="data" multiple="true" label="FASTQ file" />
            </when>

            <when value="mate_paired">
                <param name="input_1" format="fastq,fastq.gz" type="data" multiple="true" label="FASTQ file #1" />
                <param name="input_2" format="fastq,fastq.gz" type="data" multiple="true" label="FASTQ file #2" />
            </when>

            <when value="pacbio">
                <param name="input_1" format="fastq,fastq.gz" type="data" multiple="true" label="FASTQ file" />
            </when>

            <when value="nanopore">
                <param name="input_1" format="fastq,fastq.gz" type="data" multiple="true" label="FASTQ file" />
            </when>
        </conditional>

        <conditional name="assembly">
            <param name="type" type="select" label="Type of assembly">
                <option value="genome">Genome</option>
                <option value="metagenome">Metagenome</option>
            </param>
            <when value="genome">
                <conditional name="ref">
                    <param name="use_ref" type="select" label="Use a reference genome?" help="Many metrics can't be evaluated without a reference. If this is omitted, QUAST will only report the metrics that can be evaluated without a reference.">
                        <option value="true">Yes</option>
                        <option value="false" selected="true">No</option>
                    </param>
                    <when value="true">
                        <param argument="-r" type="data" format="fasta" multiple="true" label="Reference genome" />
                        <param argument="--features" type="data" format="gff, gff3, bed" optional="true" label="Genomic feature positions in the reference genome" help="Gene coordinates for the reference genome"/>
                        <param argument="--operons" type="data" format="gff, gff3, bed" optional="true" label="Operon positions in the reference genome" help="Operon coordinates for the reference genome"/>
                        <conditional name="k_mer">
                            <param argument="--k-mer-stats" type="select" label="Compute k-mer-based quality metrics?" help="It is recommended for large genomes. This may significantly increase memory and time consumption on large genomes">
                                <option value="--k-mer-stats">Yes</option>
                                <option value="" selected="true">No</option>
                            </param>
                            <when value="--k-mer-stats">
                                <param name="k_mer_size" argument="--k-mer-size" type="integer" value="101" label="Size of k" />
                            </when>
                            <when value=""/>
                        </conditional>
                        <param argument="--circos" type="boolean" truevalue="--circos" falsevalue="" checked="false" label="Generage Circos plot" help="Plot Circos version of Icarus contig alignment viewer"/>
                    </when>
                    <when value="false">
                        <param argument="--est-ref-size" type="integer" optional="true" label="Estimated reference genome size (in bp) for computing NGx statistics" help=""/>
                    </when>
                </conditional>
                <param name="orga_type" type="select" label="Type of organism">
                    <option value="">Prokaryotes: use of GeneMarkS for gene finding (default)</option>
                    <option value="--eukaryote">Eukaryote: use of GeneMark-ES for gene finding, Barrnap for ribosomal RNA genes prediction, BUSCO for conserved orthologs finding  (--eukaryote)</option>
                    <option value="--fungus">Fungus: use of GeneMark-ES for gene finding, Barrnap for ribosomal RNA genes prediction, BUSCO for conserved orthologs finding (--fungus)</option>
                </param>
            </when>
            <when value="metagenome">
                <conditional name="ref">
                    <param name="origin" type="select" label="Reference genome" help="Many metrics can't be evaluated without a reference. If this is omitted, QUAST will only report the metrics that can be evaluated without a reference.">
                        <option value="history">From history</option>
                        <option value="list">From list</option>
                        <option value="silva">From SILVA database</option>
                        <option value="none" selected="true">None</option>
                    </param>
                    <when value="history">
                        <param argument="-r" type="data" format="fasta" multiple="true" label="Reference genome" />
                    </when>
                    <when value="list">
                        <param name="references_list" argument="references-list" type="text" value="" label="Comma-separated list of reference genomes" help="MetaQUAST will search for these references in the NCBI database and will download the found ones"/>
                    </when>
                    <when value="silva">
                        <param name="max_ref_num" argument="-max-ref-num" type="integer" value="50" label="Maximum number of reference genomes (per each assembly) to download after searching in the SILVA databa" />
                    </when>
                    <when value="none"/>
                </conditional>
            </when>
        </conditional>
        <param argument="--min-contig" type="integer" value="500" label="Lower threshold for a contig length (in bp)" help="Shorter contigs won't be taken into account"/>
        <param argument="--split-scaffolds" type="boolean" truevalue="--split-scaffolds" falsevalue="" checked="false" label="Are assemblies scaffolds rather than contigs?" help="QUAST will add split versions of assemblies to the comparison. Assemblies are split by continuous fragments of N's of length >= 10. If broken version is equal to the original assembly (i.e. nothing was split) it is not included in the comparison."/>
        <param argument="--large" type="boolean" truevalue="--large" falsevalue="" checked="false" label="Is genome large (> 100 Mbp)?" help="Use optimal parameters for evaluation of large genomes. Affects speed and accuracy. In particular, imposes --eukaryote --min-contig 3000 --min-alignment 500 --extensive-mis-size 7000 (can be overridden manually with the corresponding options). In addition, this mode tries to identify misassemblies caused by transposable elements and exclude them from the number of misassemblies."/>
        <section name="genes" title="Genes">
            <conditional name="gene_finding">
                <param name="tool" type="select" label="Tool for gene prediction" help="">
                    <option value="none">Don't predict genes</option>
                    <option value="--gene-finding">GeneMarkS if prokaryotes or GeneMark-ES if eukaryotes or fungi</option>
                    <option value="--mgm">MetaGeneMark, specially for metagenomic assembly</option>
                    <option value="--glimmer">Glimmer</option>
                </param>
                <when value="none"/>
                <when value="--gene-finding">
                    <expand macro="gene_thresholds"/>
                </when>
                <when value="--mgm"/>
                <when value="--glimmer">
                    <expand macro="gene_thresholds"/>
                </when>
            </conditional>
            <param argument="--rna-finding" type="boolean" truevalue="--rna-finding" falsevalue="" checked="false" label="Enables ribosomal RNA gene finding?" help="By default, we assume that the genome is prokaryotic, and Barrnap uses the bacterial database for rRNA prediction. If the genome is eukaryotic (fungal), use --eukaryote (--fungus) option to force Barrnap to work with the eukaryotic (fungal) database. "/>
            <param argument="--conserved-genes-finding" type="boolean" truevalue="--conserved-genes-finding" falsevalue="" checked="false" label="Enables search for Universal Single-Copy Orthologs using BUSCO?" help="By default, we assume that the genome is prokaryotic, and BUSCO uses the bacterial database of orthologs. If the genome is eukaryotic (fungal), use --eukaryote (--fungus) option to force BUSCO to work with the eukaryotic (fungal) database. "/>
        </section>
        <section name="alignments" title="Alignments">
            <param argument="--use-all-alignments" type="boolean" truevalue="--use-all-alignments" falsevalue="" checked="false" label="Use all alignments as in QUAST v.1.*. to compute genome fraction, # genomic features, # operons metrics?" help="By default, QUAST v.2.0 and higher filters out ambiguous and redundant alignments, keeping only one alignment per contig (or one set of non-overlapping or slightly overlapping alignments)"/>
            <param argument="--min-alignment" type="integer" value="65" label="Minimum length of alignment" help="Alignments shorter than this value will be filtered. Note that all alignments shorter than 65 bp will be filtered regardless of this threshold."/>
            <param argument="--min-identity" type="float" value="95.0" label="Minimum IDY% considered as proper alignment" help="Alignments with IDY% worse than this value will be filtered. ote that all alignments with IDY% less than 80.0% will be filtered regardless of this threshold. "/>
            <param argument="--ambiguity-usage" type="select" label="How processing equally good alignments of a contig (probably repeats)?" help="">
                <option value="none">Skip all such alignments</option>
                <option value="one" selected="true">Take only one (the very best one)</option>
                <option value="all">Use all alignments. It can cause a significant increase of # mismatches (repeats are almost always inexact due to accumulated SNPs, indels, etc.). It is useful for metagenomic assemblies where ambiguous alignments might represent homologous sequences of different strains</option>
            </param>
            <param argument="--ambiguity-score" type="float" value="0.99" min="0.8" max="1.0" label="Score S for defining equally good alignments of a single contig" help="All alignments are sorted by decreasing LEN × IDY% value. All alignments with LEN × IDY% less than S × best(LEN × IDY%) are discarded. "/>
            <param argument="--fragmented" type="boolean" truevalue="--fragmented" falsevalue="" checked="false" label="Fragmented reference genome" help="Reference genome is fragmented (e.g. a scaffold reference). QUAST will try to detect misassemblies caused by the fragmentation and mark them fake (will be excluded from misassemblies). Note: QUAST will not detect misassemblies caused by the linear representation of circular genome "/>
            <param argument="--upper-bound-assembly" type="boolean" truevalue="--upper-bound-assembly" falsevalue="" label="Simulate upper bound assembly" help="Simulate upper bound assembly based on the reference genome and a given set reads (mate-pairs or long reads, such as Pacbio SMRT/Oxford Nanopore, are REQUIRED). This assembly is added to the comparison and could be useful for estimating the upper bounds of completeness and contiguity that theoretically can be reached by assembly software from this particular set of reads. The concept is based on the fact that the reference genome cannot be completely reconstructed from raw reads due to long genomic repeats and low covered regions." />
            <param argument="--upper-bound-min-con" type="integer" value="" optional="true" label="Minimal number of 'connecting reads' needed for joining upper bound contigs into a scaffold" help="This is important for a realistic estimation of genome assembly fragmentation due to long repeats. The default values is 2 for mate-pairs and 1 for long reads (PacBio or Nanopore libraries)"/>
        </section>
        <section name="advanced" title="Advanced options">
            <param argument="--contig-thresholds" type="text" value="0,1000" label="Comma-separated list of contig length thresholds (in bp)" help="Used in # contigs ≥ x and total length (≥ x) metrics"/>
            <param argument="--strict-NA" type="boolean" truevalue="--strict-NA" falsevalue="" checked="false" label="Break contigs at every misassembly event (including local ones) to compute NAx and NGAx statistics?" help="By default, QUAST breaks contigs only at extensive misassemblies (not local ones)."/>
            <param argument="--extensive-mis-size" type="integer" value="1000" min="85" label="Lower threshold for the relocation size (gap or overlap size between left and right flanking sequence)" help="Shorter relocations are considered as local misassemblies. It does not affect other types of extensive misassemblies (inversions and translocations). The default value is 1000 bp. Note that the threshold should be greater than maximum indel length which is 85 bp."/>
            <param argument="--scaffold-gap-max-size" type="integer" value="1000" label="Max allowed scaffold gap length difference for detecting corresponding type of misassemblies" help="Longer inconsistencies are considered as relocations and thus, counted as extensive misassemblies. The default value is 10000 bp. Note that the threshold make sense only if it is greater than extensive misassembly size"/>
            <param argument="--unaligned-part-size" type="integer" value="500" label="Lower threshold for detecting partially unaligned contigs" help=""/>
            <param argument="--skip-unaligned-mis-contigs" type="boolean" truevalue="" falsevalue="--skip-unaligned-mis-contigs" checked="true" label="Distinguish contigs with more than 50% unaligned bases as a separate group of contigs?" help="By default, QUAST breaks contigs only at extensive misassemblies (not local ones)."/>
            <param argument="--fragmented-max-indent" type="integer" min="0" value="" optional="true" label="Fragment max indent" help="Mark translocation as fake if both alignments are located no further than N bases from the ends of the reference fragments. The value should be less than extensive misassembly size.Default value is 50. Note: requires --fragmented option" />
        </section>
        <param name="output_files" type="select" display="checkboxes" optional="true" multiple="true" label="Output files">
            <option value="html" selected="true">HTML report</option>
            <option value="pdf">PDF report</option>
            <option value="tabular">Tabular reports</option>
            <option value="log">Log file</option>
        </param>
    </inputs>
    <outputs>
        <data name="quast_tabular" format="tabular" label="${tool.name} on ${on_string}: tabular report" from_work_dir="outputdir/report.tsv">
            <filter>'tabular' in output_files</filter>
        </data>
        <data name="report_html" format="html" label="${tool.name} on ${on_string}:  HTML report" from_work_dir="outputdir/report.html">
            <filter>'html' in output_files</filter>
        </data>
        <data name="report_pdf" format="pdf" label="${tool.name} on ${on_string}:  PDF report" from_work_dir="outputdir/report.pdf">
            <filter>'pdf' in output_files</filter>
        </data>
        <data name="log" format="txt" label="${tool.name} on ${on_string}: Log" from_work_dir="outputdir/quast.log">
            <filter>'log' in output_files</filter>
        </data>
        <data name="mis_ass" format="tabular" label="${tool.name} on ${on_string}: Misassemblies report" from_work_dir="outputdir/contigs_reports/misassemblies_report.txt">
            <filter>assembly['type'] == 'genome' and assembly['ref']['use_ref'] == 'true'</filter>
            <filter>'tabular' in output_files</filter>
        </data>
        <data name="unalign" format="tabular" label="${tool.name} on ${on_string}: Unaligned contigs report" from_work_dir="outputdir/contigs_reports/unaligned_report.tsv">
            <filter>assembly['type'] == 'genome' and assembly['ref']['use_ref'] == 'true'</filter>
            <filter>'tabular' in output_files</filter>
        </data>
        <data name="kmers" format="tabular" label="${tool.name} on ${on_string}: K-mer-based metrics report" from_work_dir="outputdir/k_mer_stats/kmers_report.txt">
            <filter>assembly['type'] == 'genome' and assembly['ref']['use_ref'] == 'true' and assembly['ref']['k_mer']['k_mer_stats'] != ''</filter>
            <filter>'tabular' in output_files</filter>
        </data>
        <data name="circos_output" format="png" from_work_dir="outputdir/circos/circos.png" label="${tool.name} on ${on_string}: Circos plot">
            <filter>assembly['type'] == 'genome' and assembly['ref']['use_ref'] == 'true' and assembly['ref']['circos']</filter>
        </data>
    </outputs>
    <tests>
        <!-- Test 01: reference, genes annotations and operon coordinates -->
        <test expect_num_outputs="2">
            <conditional name="in">
                <param name="custom" value="true"/>
                <repeat name="inputs">
                    <param name="input" value="contigs1.fna"/>
                    <param name="labels" value="contig1"/>
                </repeat>
                <repeat name="inputs">
                    <param name="input" value="contigs2.fna"/>
                    <param name="labels" value="contig2"/>
                </repeat>
            </conditional>
            <conditional name="assembly">
                <param name="type" value="genome"/>
                <conditional name="ref">
                    <param name="use_ref" value="true"/>
                    <param name="r" value="reference.fna"/>
                    <param name="features" value="genes.gff"/>
                    <param name="operons" value="operons.bed"/>
                    <conditional name="k_mer">
                        <param name="k_mer_stats" value="--k-mer-stats"/>
                        <param name="k_mer_size" value="101" />
                    </conditional>
                    <param name="circos" value="true"/>
                </conditional>
                <param name="orga_type" value=""/>
            </conditional>
            <param name="min_contig" value="500"/>
            <param name="split_scaffolds" value="false"/>
            <section name="genes">
                <conditional name="gene_finding">
                    <param name="tool" value="--gene_finding"/>
                    <param name="gene_thresholds" value="0,300,1500,3000"/>
                </conditional>
                <param name="rna_finding" value="true"/>
                <param name="conserved_genes_finding" value="true"/>
            </section>
            <section name="alignments">
                <param name="use_all_alignments" value="true"/>
                <param name="min_alignment" value="65"/>
                <param name="min_identity" value="95.0"/>
                <param name="ambiguity_usage" value="one"/>
                <param name="ambiguity_score" value="0.99"/>
            </section>
            <section name="advanced">
                <param name="contig_thresholds" value="0,1000"/>
                <param name="strict_NA" value="true"/>
                <param name="extensive_mis_size" value="1000"/>
                <param name="scaffold_gap_max_size" value="1000"/>
                <param name="unaligned_part_size" value="500"/>
                <param name="skip_unaligned_mis_contigs" value="true"/>
                <param name="fragmented_max_indent" value="50"/>
            </section>
            <param name="output_files" value="html"/>
            <output name="report_html" file="test1_report.html" ftype="html" compare="sim_size"/>
            <output name="circos_output" file="test1_circos.png" ftype="png"/>
        </test>
        <!-- Test 02: all outputs -->
        <test expect_num_outputs="8">
            <conditional name="in">
                <param name="custom" value="true"/>
                <repeat name="inputs">
                    <param name="input" value="contigs1.fna"/>
                    <param name="labels" value="contig1"/>
                </repeat>
                <repeat name="inputs">
                    <param name="input" value="contigs2.fna"/>
                    <param name="labels" value="contig2"/>
                </repeat>
            </conditional>
            <conditional name="assembly">
                <param name="type" value="genome"/>
                <conditional name="ref">
                    <param name="use_ref" value="true"/>
                    <param name="r" value="reference.fna"/>
                    <param name="features" value="genes.gff"/>
                    <param name="operons" value="operons.bed"/>
                    <conditional name="k_mer">
                        <param name="k_mer_stats" value="--k-mer-stats"/>
                        <param name="k_mer_size" value="101" />
                    </conditional>
                    <param name="circos" value="true"/>
                </conditional>
            </conditional>
            <param name="output_files" value="html,pdf,tabular,log"/>
            <output name="report_html" file="test2_report.html" ftype="html" compare="sim_size"/>
            <output name="report_pdf" file="test2_report.pdf" ftype="pdf" compare="sim_size"/>
            <output name="quast_tabular" file="test2_report.tab" ftype="tabular"/>
            <output name="log" file="test2.log" ftype="txt" compare="sim_size"/>
            <output name="mis_ass" file="test2_missasemblies.tab" ftype="tabular"/>
            <output name="unalign" file="test2_unaligned.tab" ftype="tabular"/>
            <output name="kmers" file="test2_kmers.tab" ftype="tabular"/>
            <output name="circos_output" file="test2_circos.png" ftype="png" compare="sim_size"/>
        </test>
        <!-- Test 03: without reference -->
        <test expect_num_outputs="3">
            <conditional name="in">
                <param name="custom" value="false"/>
                <param name="inputs" value="contigs1.fna,contigs2.fna"/>
            </conditional>
            <conditional name="assembly">
                <param name="type" value="genome"/>
                <conditional name="ref">
                    <param name="use_ref" value="false"/>
                </conditional>
                <param name="orga_type" value="--eukaryote"/>
            </conditional>
            <param name="min_contig" value="500"/>
            <param name="split_scaffolds" value="false"/>
            <param name="large" value="false"/>
            <section name="genes">
                <conditional name="gene_finding">
                    <param name="tool" value="none"/>
                </conditional>
                <param name="rna_finding" value="false"/>
                <param name="conserved_genes_finding" value="false"/>
            </section>
            <section name="alignments">
                <param name="use_all_alignments" value="false"/>
                <param name="min_alignment" value="65"/>
                <param name="min_identity" value="95.0"/>
                <param name="ambiguity_usage" value="one"/>
                <param name="ambiguity_score" value="0.99"/>
                <param name="fragmented" value="false"/>
            </section>
            <section name="advanced">
                <param name="contig_thresholds" value="0,1000, 500"/>
                <param name="strict_NA" value="false"/>
                <param name="extensive_mis_size" value="1000"/>
                <param name="scaffold_gap_max_size" value="1000"/>
                <param name="unaligned_part_size" value="500"/>
                <param name="skip_unaligned_mis_contigs" value="-"/>
            </section>
            <param name="output_files" value="html,pdf,log" />
            <output name="log" file="test3.log" ftype="txt" compare="sim_size"/>
            <output name="report_html" file="test3_report.html" compare="sim_size"/>
            <output name="report_pdf" file="test3_report.pdf" compare="sim_size"/>
        </test>
        <!-- Test 04: metagenomics -->
        <test>
            <conditional name="in">
                <param name="custom" value="false"/>
                <param name="inputs" value="contigs3.fasta"/>
            </conditional>
            <conditional name="assembly">
                <param name="type" value="metagenome"/>
                <conditional name="ref">
                    <param name="origin" value="none"/>
                </conditional>
            </conditional>
            <param name="min_contig" value="500"/>
            <param name="split_scaffolds" value="false"/>
            <param name="large" value="false"/>
            <section name="genes">
                <conditional name="gene_finding">
                    <param name="tool" value="--mgm"/>
                </conditional>
                <param name="rna_finding" value="false"/>
                <param name="conserved_genes_finding" value="false"/>
            </section>
            <section name="alignments">
                <param name="use_all_alignments" value="false"/>
                <param name="min_alignment" value="65"/>
                <param name="min_identity" value="95.0"/>
                <param name="ambiguity_usage" value="one"/>
                <param name="ambiguity_score" value="0.99"/>
                <param name="fragmented" value="false"/>
            </section>
            <section name="advanced">
                <param name="contig_thresholds" value="0,1000, 500"/>
                <param name="strict_NA" value="false"/>
                <param name="extensive_mis_size" value="1000"/>
                <param name="scaffold_gap_max_size" value="1000"/>
                <param name="unaligned_part_size" value="500"/>
                <param name="skip_unaligned_mis_contigs" value="-"/>
            </section>
            <param name="output_files" value="tabular"/>
            <output name="quast_tabular" file="test4.tab" ftype="tabular"/>
        </test>
        <!-- Test 05: FASTQ read files -->
        <test expect_num_outputs="3">
            <conditional name="in">
                <param name="custom" value="true"/>
                <repeat name="inputs">
                    <param name="input" value="contigs1.fna"/>
                    <param name="labels" value="contig1"/>
                </repeat>
                <repeat name="inputs">
                    <param name="input" value="contigs2.fna"/>
                    <param name="labels" value="contig2"/>
                </repeat>
            </conditional>
            <conditional name="reads">
                <param name="reads_option" value="pacbio"/>
                <param name="input_1" value="pacbio_01.fastq,pacbio_02.fastq,pacbio_03.fastq,pacbio_04.fastq"/>
            </conditional>
            <conditional name="assembly">
                <param name="type" value="genome"/>
                <conditional name="ref">
                    <param name="use_ref" value="true"/>
                    <param name="r" value="reference.fna"/>
                </conditional>
            </conditional>
            <section name="alignments">
                <param name="upper_bound_assembly" value="true"/>
                <param name="upper_bound_min_con" value="1"/>
            </section>
            <param name="output_files" value="tabular"/>
            <output name="quast_tabular" file="test5.tab" ftype="tabular"/>
            <output name="mis_ass" file="test5_missasemblies.tab" ftype="tabular"/>
            <output name="unalign" file="test5_unaligned.tab" ftype="tabular"/>
        </test>
        <!-- Test 6: FASTQ.gz read files -->
        <test expect_num_outputs="1">
            <conditional name="in">
                <param name="custom" value="true"/>
                <repeat name="inputs">
                    <param name="input" value="contigs1.fna"/>
                    <param name="labels" value="contig1"/>
                </repeat>
                <repeat name="inputs">
                    <param name="input" value="contigs2.fna"/>
                    <param name="labels" value="contig2"/>
                </repeat>
            </conditional>
            <conditional name="reads">
                <param name="reads_option" value="single"/>
                <param name="input_1" value="pacbio_01.fastq.gz,pacbio_02.fastq.gz"/>
            </conditional>
            <param name="output_files" value="tabular"/>
            <output name="quast_tabular" file="test6.tab" ftype="tabular"/>
        </test>
    </tests>
    <help>
<![CDATA[
**What it does**

QUAST = QUality ASsessment Tool. The tool evaluates genome assemblies by computing various metrics.

If you have one or multiple genome assemblies, you can assess their quality with Quast. It works with or without reference genome. If you are new to Quast, start by reading its `manual page <http://quast.sourceforge.net/docs/manual.html>`_.

**Using Quast without reference**

Without reference Quast can calculate a number of assembly related-metrics but cannot provide any information about potential misassemblies, inversions, translocations, etc. Suppose you have three assemblies produced by Unicycler corresponding to three different antibiotic treatments *car*, *pit*, and *cef* (these stand for carbenicillin, piperacillin, and cefsulodin, respectively). Evaluating them without reference will produce the following Quast outputs:

 * Quast report in HTML format
 * `Contig viewer <http://quast.sourceforge.net/docs/manual.html#sec3.4>`_ (an HTML file)
 * `Quast report <http://quast.sourceforge.net/docs/manual.html#sec3.1.1>`_ in Tab-delimited format
 * Quast log (a file technical information about Quast tool execution)

The **tab delimited Quast report** will contain the following information::

 Assembly                  pit_fna cef_fna car_fna
 # contigs (>= 0 bp)           100      91      94
 # contigs (>= 1000 bp)         62      58      61
 Total length (>= 0 bp)    6480635 6481216 6480271
 Total length (>= 1000 bp) 6466917 6468946 6467103
 # contigs                      71      66      70
 Largest contig             848753  848766  662053
 Total length              6473173 6474698 6473810
 GC (%)                      66.33   66.33   66.33
 N50                        270269  289027  254671
 N75                        136321  136321  146521
 L50                             7       7       8
 L75                            15      15      16
 # N's per 100 kbp            0.00    0.00    0.00

where values are defined as specified in `Quast manual <http://quast.sourceforge.net/docs/manual.html#sec3.1.1>`_

**Quast report in HTML format** contains graphs in addition to the above metrics, while **Contig viewer** draws contigs ordered from longest to shortest. This ordering is suitable for comparing only largest contigs or number of contigs longer than a specific threshold. The viewer shows N50 and N75 with color and textual indication. If the reference genome is available or at least approximate genome length is known (see `--est-ref-size`), NG50 and NG75 are also shown. You can also tone down contigs shorter than a specified threshold using Icarus control panel:

.. image:: $PATH_TO_IMAGES/contig_view_noR.png
   :width: 558
   :height: 412

Also see `Plot description <http://quast.sourceforge.net/docs/manual.html#sec2>`_ section of the manual.

**Using Quast with reference**

Car, pit, and cef are in fact assemblies of *Pseudomonas aeruginosa* UCBPP-PA14, so we can use its genome as a reference (by supplying a Fasta file containing *P. aeruginosa* pa14 genome to **Reference genome** input box). The following outputs will be produced (note the alignment viewer):

 * Quast report in HTML format
 * `Contig viewer <http://quast.sourceforge.net/docs/manual.html#sec3.4>`_ (an HTML file)
 * `Alignment viewer <http://quast.sourceforge.net/docs/manual.html#sec3.4>`_ (an HTML file)
 * `Quast report <http://quast.sourceforge.net/docs/manual.html#sec3.1.1>`_ in Tab-delimited format
 * Summary of `misassemblies <http://quast.sourceforge.net/docs/manual.html#sec3.1.2>`_
 * Summary of `unaligned contigs <http://quast.sourceforge.net/docs/manual.html#sec3.1.3>`_
 * Quast log (a file technical information about Quast tool execution)

With the reference Quast produces a much more comprehensive set of results::

 Assembly                  pit_fna cef_fna car_fna
 # contigs (>= 0 bp)           100      91      94
 # contigs (>= 1000 bp)         62      58      61
 Total length (>= 0 bp)    6480635 6481216 6480271
 Total length (>= 1000 bp) 6466917 6468946 6467103
 # contigs                      71      66      70
 Largest contig             848753  848766  662053
 Total length              6473173 6474698 6473810
 Reference length          6537648 6537648 6537648
 GC (%)                      66.33   66.33   66.33
 Reference GC (%)            66.29   66.29   66.29
 N50                        270269  289027  254671
 NG50                       270269  289027  254671
 N75                        136321  136321  146521
 NG75                       136321  136321  136321
 L50                             7       7       8
 LG50                            7       7       8
 L75                            15      15      16
 LG75                           15      15      17
 # misassemblies                 0       0       0
 # misassembled contigs          0       0       0
 Misassembled contigs length     0       0       0
 # local misassemblies           1       1       2
 # unaligned mis. contigs        0       0       0
 # unaligned contigs         0 + 0   0 + 0   0 + 0
                              part    part    part
 Unaligned length                0       0       0
 Genome fraction (%)        99.015  99.038  99.025
 Duplication ratio           1.000   1.000   1.000
 # N's per 100 kbp            0.00    0.00    0.00
 # mismatches per 100 kbp     3.82    3.63    3.49
 # indels per 100 kbp         1.19    1.13    1.13
 Largest alignment          848753  848766  662053
 Total aligned length      6473163 6474660 6473792
 NA50                       270269  289027  254671
 NGA50                      270269  289027  254671
 NA75                       136321  136321  146521
 NGA75                      136321  136321  136321
 LA50                            7       7       8
 LGA50                           7       7       8
 LA75                           15      15      16
 LGA75                          15      15      17

where, again, values are defined as specified in `Quast manual <http://quast.sourceforge.net/docs/manual.html#sec3.1.1>`_. You can see that this report includes a variety of data that can only be computer against a reference assembly.

Using reference also produces an **Alignment viewer**:

.. image:: $PATH_TO_IMAGES/Align_view.png
   :width: 515
   :height: 395

Alignment viewer highlights regions of interest as, in this case, missassemblies that can potentially point to genome rearrangements (see more `here <http://quast.sourceforge.net/docs/manual.html#sec3.4>`_).

    ]]>
    </help>
    <expand macro="citations"/>
</tool>
author	iuc
date	Mon, 08 Nov 2021 11:26:22 +0000
parents	45924fa8d8c5
children	7594365c546b