Mercurial > repos > iuc > coverm_genome

<tool id="coverm_genome" name="CoverM genome" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>Calculate coverage of individual genomes</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="bio_tools"/>
    <expand macro="requirements"/>
    <command><![CDATA[
#import re

#set $single_fp = []
#set $fw_fp = []
#set $rv_fp = []
#set $interl_fp = []
#set $ref_fp = []
#set $bam_fp = []
#set $genome_fp = []

mkdir 'single/' &&
mkdir 'fw/' &&
mkdir 'rv/' &&
mkdir 'interl/' &&
mkdir 'ref/' &&
mkdir 'bam/' &&

#if $mapped.mapped == 'mapped'
    @BAMS@
    #if $mapped.genome.ref_or_genome == 'genomic'
        #if $mapped.genome.genomic.source == 'history'
            #for $i, $genome in enumerate($mapped.genome.genomic.genome_fasta_files)
                #set $fn = re.sub('[^\s\w\-\\.]', '_', str($genome.element_identifier))
                #silent $genome_fp.append( $fn )
ln -s '$genome' '$fn' &&
            #end for
        #else
            #for $genome in $mapped.genome.genomic.genome_fasta_files
                #silent $genome_fp.append($genome)
            #end for
        #end if
    #end if
#else
    #if $mapped.mode.mode == 'individual'
        @INDIVIDUAL_ASSEMBLY_READS@
        #if $mapped.mode.genome.ref_or_genome == 'genomic'
            @GENOME_FOR_READS@
        #else
            #set $ref = $mapped.mode.genome.reference
            @INDIVIDUAL_ASSEMBLY_REF@
        #end if
    #else
        @CO_ASSEMBLY_ALL_READS@
        #if $mapped.mode.genome.ref_or_genome == 'genomic'
            @GENOME_FOR_READS@
        #else
            #set $refs = $mapped.mode.genome.reference
            @CO_ASSEMBLY_REF@
        #end if
    #end if
#end if

mkdir 'representative-fasta/' &&
coverm
    genome
#if $mapped.mapped == 'mapped'
    -b
    #for $bam in $bam_fp
        '$bam'
    #end for
    #if $mapped.genome.ref_or_genome == 'contigs'
    $mapped.genome.cond_single_genome.single_genome
        #if $mapped.genome.cond_single_genome.single_genome == ''
            #if $mapped.genome.cond_single_genome.genome_contig_definition.choice == 'genome-definition'
    --genome-definition '$mapped.genome.cond_single_genome.genome_contig_definition.genome_definition'
            #else if $mapped.genome.cond_single_genome.genome_contig_definition.choice == 'separator'
    --separator '$mapped.genome.cond_single_genome.genome_contig_definition.separator'
            #end if
        #end if
    $mapped.sharded
    #else
    --genome-fasta-files
        #for $genome in $genome_fp
        '$genome'
        #end for
    #end if
#else
    #if $fw_fp
    -1
        #for $read in $fw_fp
        '$read'
        #end for
    -2
        #for $read in $rv_fp
        '$read'
        #end for
    #else if $single_fp
    --single
        #for $read in $single_fp
        '$read'
        #end for
    #else if $interl_fp
    --interleaved
        #for $read in $interl_fp
        '$read'
        #end for
    #end if

    --mapper '$mapped.mapper'
    #if $mapped.mode.genome.ref_or_genome == 'contigs'
    --reference
        #for $ref in $ref_fp
        '$ref'
        #end for
    $mapped.mode.genome.cond_single_genome.single_genome
        #if $mapped.mode.genome.cond_single_genome.single_genome == ''
            #if $mapped.mode.genome.cond_single_genome.genome_contig_definition.choice == 'genome-definition'
    --genome-definition '$mapped.mode.genome.cond_single_genome.genome_contig_definition.genome_definition'
            #else if $mapped.mode.genome.cond_single_genome.genome_contig_definition.choice == 'separator'
    --separator '$mapped.mode.genome.cond_single_genome.genome_contig_definition.separator'
            #end if
        #end if
        #if $mapped.mode.mode == 'co'
    $mapped.mode.genome.sharded
        #end if
    #else
    --genome-fasta-files
        #for $genome in $genome_fp
        '$genome'
        #end for
    #end if
#end if

    $exclude_genomes_from_deshard
    --min-read-aligned-length $alignment.min_read_aligned_length
    --min-read-percent-identity $alignment.min_read_percent_identity
    --min-read-aligned-percent $alignment.min_read_aligned_percent
    $alignment.proper_pairs_only.proper_pairs_only
#if $alignment.proper_pairs_only.proper_pairs_only != ''
    --min-read-aligned-length-pair $alignment.proper_pairs_only.min_read_aligned_length_pair
    --min-read-percent-identity-pair $alignment.proper_pairs_only.min_read_percent_identity_pair
    --min-read-aligned-percent-pair $alignment.proper_pairs_only.min_read_aligned_percent_pair
#end if

    --methods
#for method in $cov.methods
        '$method'
#end for
    --min-covered-fraction $cov.min_covered_fraction
    --contig-end-exclusion $cov.contig_end_exclusion
    --trim-min $cov.trim_min
    --trim-max $cov.trim_max

    $derep.dereplicate.dereplicate
#if $derep.dereplicate.dereplicate != ''
    --dereplication-ani $derep.dereplicate.dereplication_ani
    --dereplication-aligned-fraction $derep.dereplicate.dereplication_aligned_fraction
    --dereplication-fragment-length $derep.dereplicate.dereplication_fragment_length
    --dereplication-prethreshold-ani $derep.dereplicate.dereplication_prethreshold_ani
    --dereplication-quality-formula '$derep.dereplicate.dereplication_quality_formula'
    --dereplication-precluster-method '$derep.dereplicate.dereplication_precluster_method'
#end if
#if $derep.checkm_tab_table
    --chekm-tab-table '$derep.checkm_tab_table'
#end if
#if $derep.genome_info:
    --genome-info '$derep.genome_info'
#end if
#if $derep.min_completeness != "":
    --min-completeness $derep.min_completeness
#end if
#if $derep.max_contamination != "":
    --max-contamination $derep.max_contamination
#end if

    --output-format '$out.output_format'
    --output-file '$output'
    $out.no_zeros
#if $out.dereplication_output_cluster_definition:
    --dereplication-output-cluster-definition '$cluster_definition'
#end if
#if $out.dereplication_output_representative_fasta_directory_copy:
    --dereplication-output-representative-fasta-directory-copy './representative-fasta/'
#end if
    --threads \${GALAXY_SLOTS:-1}

#if $derep.dereplicate.dereplicate and $out.dereplication_output_cluster_definition
    && sed -i -e 's@genomes/@@g; s/\.fna//g' '$cluster_definition'
#end if
    ]]></command>
    <inputs>
        <conditional name="mapped">
            <expand macro="mapped"/>
            <when value="mapped">
                <expand macro="mapped_params"/>
                <conditional name="genome">
                    <param name="ref_or_genome" type="select" label="Genome definition">
                        <option value="contigs" selected="true">From contigs in the BAM file(s)</option>
                        <option value="genomic">From FASTA files with each genome</option>
                    </param>
                    <when value="contigs">
                        <expand macro="cond_single_genome"/>
                    </when>
                    <when value="genomic">
                        <expand macro="genomic"/>
                    </when>
                </conditional>
            </when>
            <when value="not-mapped">
                <conditional name="mode">
                    <expand macro="assembly_mode"/>
                    <when value="individual">
                        <expand macro="individual_assembly_reads"/>
                        <conditional name="genome">
                            <expand macro="ref_or_genome"/>
                            <when value="contigs">
                                <expand macro="individual_assembly_reference"/>
                                <expand macro="cond_single_genome"/>
                            </when>
                            <when value="genomic">
                                <expand macro="genomic"/>
                            </when>
                        </conditional>
                    </when>
                    <when value="co">
                        <expand macro="co_assembly_reads"/>
                        <repeat name="extra_reads" title="Additional reads">
                            <expand macro="co_assembly_reads"/>
                        </repeat>
                        <conditional name="genome">
                            <expand macro="ref_or_genome"/>
                            <when value="contigs">
                                <expand macro="co_assembly_reference"/>
                                <expand macro="cond_single_genome"/>
                            </when>
                            <when value="genomic">
                                <expand macro="genomic"/>
                            </when>
                        </conditional>
                    </when>
                </conditional>
                <expand macro="mapping"/>
            </when>
        </conditional>
        <param argument="--exclude-genomes-from-deshard" type="boolean" truevalue="--exclude-genomes-from-deshard" falsevalue="" checked="false" label="Exclude genomes from deshard"
            help="Ignore genomes whose name appears in this newline-separated file when combining shards." />
        <expand macro="alignment"/>
        <section name="cov" title="Coverage calculation options" expanded="false">
            <param argument="--methods" type="select" multiple="true" label="Method(s) for calculating coverage">
                <option value="relative_abundance" selected="true">relative_abundance: Percentage relative abundance of each genome, and the unmapped read percentage</option>
                <option value="mean">mean: Average number of aligned reads overlapping each position on the contig</option>
                <expand macro="cov_method_options"/>
            </param>
            <expand macro="coverage_params"/>
        </section>
        <section name="derep" title="Dereplication / Genome clustering" expanded="false">
            <conditional name="dereplicate">
                <param argument="--dereplicate" type="select" label="Do genome dereplication via average nucleotide identity (ANI)?" help="When this is run,
                    dereplication occurs transparently through the Galah method.">
                    <option value="--dereplicate">Yes</option>
                    <option value="" selected="false">No</option>
                </param>
                <when value="--dereplicate">
                    <param argument="--dereplication-ani" type="float" min="0" max="100" value="99" label="Overall ANI level to dereplicate at with FastANI" />
                    <param argument="--dereplication-aligned-fraction" type="float" min="0" value="50" label="Minimum aligned fraction of two genomes for clustering" />
                    <param argument="--dereplication-fragment-length" type="integer" min="0" value="3000" label="Length of fragment used in FastANI calculation (i.e. --fragLen)" />
                    <param argument="--dereplication-quality-formula" type="select" label="Scoring function for genome quality">
                        <option value="Parks2020_reduced" selected="true">Parks2020_reduced: A quality formula described in Parks et. al. 2020</option>
                        <option value="completeness-4contamination">completeness-4contamination</option>
                        <option value="completeness-5contamination">completeness-5contamination</option>
                        <option value="dRep">dRep</option>
                    </param>
                    <param argument="--dereplication-prethreshold-ani" type="float" min="0" max="100" value="95" label="Dereplication preclustering threshold"
                        help="Require at least this dashing-derived ANI for preclustering and to avoid FastANI on distant lineages within preclusters." />
                    <param argument="--dereplication-precluster-method" type="select" label="Method of calculating rough ANI for dereplication">
                        <option value="dashing" selected="true">dashing: HyperLogLog</option>
                        <option value="finch">finch: finch MinHash</option>
                    </param>
                </when>
                <when value=""/>
            </conditional>
            <param argument="--checkm-tab-table" type="data" format="tabular" optional="true" label="checkM table for defining genome quality"
                help="It is used both for filtering and to rank genomes during clustering"/>
            <param argument="--genome-info" type="data" format="csv" optional="true"
                label="dRep style genome info table for defining quality" />
            <param argument="--min-completeness" type="float" optional="true" min="0" max="100"
                label="Minimum completeness" help="Genomes with lower completeness percentage will be ignored" />
            <param argument="--max-contamination" type="float" optional="true" min="0" max="100"
                label="Maximum contamination" help="Genomes with higher contamination percentage will be ignored" />
        </section>
        <section name="out" title="Outputs" expanded="false">
            <expand macro="output_format"/>
            <param argument="--no-zeros" type="boolean" truevalue="--no-zeros" falsevalue="" label="Omit printing of genomes that have zero coverage?" />
            <param argument="--dereplication-output-cluster-definition" type="boolean" truevalue="--dereplication-output-cluster-definition" falsevalue="" label="Output a tabular files with dereplicated representatives and member lines?" />
            <param argument="--dereplication-output-representative-fasta-directory-copy" type="boolean" truevalue="--dereplication-output-representative-fasta-directory-copy" falsevalue="" label="Output dereplicated representative genomes?" />
        </section>
    </inputs>
    <outputs>
        <data name="output" format="tabular" label="${tool.name} on ${on_string}"/>
        <data name="cluster_definition" format="tabular" label="${tool.name} on ${on_string}: Cluster definition">
            <filter>derep['dereplicate']['dereplicate'] and out['dereplication_output_cluster_definition']</filter>
        </data>
        <collection name="representative_fasta" type="list" label="${tool.name} on ${on_string}: Dereplicated epresentative genomes">
            <discover_datasets pattern="(?P&lt;designation&gt;.*)\.fna" format="fasta" directory="representative-fasta" />
            <filter>derep['dereplicate']['dereplicate'] and out['dereplication_output_representative_fasta_directory_copy']</filter>
        </collection>
    </outputs>
    <tests>
        <test expect_num_outputs="1">
            <conditional name="mapped">
                <param name="mapped" value="not-mapped" />
                <conditional name="mode">
                    <param name="mode" value="co"/>
                    <conditional name="read_type">
                        <param name="type" value="paired_collection"/>
                        <param name="paired_reads">
                            <collection type="list:paired">
                                <element name="reads_for_seq1_and_seq2">
                                    <collection type="paired">
                                        <element name="forward" value="reads_for_seq1_and_seq2.1.fq.gz" ftype="fastqsanger.gz"/>
                                        <element name="reverse" value="reads_for_seq1_and_seq2.2.fq.gz" ftype="fastqsanger.gz"/>
                                    </collection>
                                </element>
                            </collection>
                        </param>
                    </conditional>
                    <repeat name="extra_reads">
                        <conditional name="read_type">
                            <param name="type" value="single"/>
                            <param name="single" value="reads_for_seq1_and_seq2.fna"/>
                        </conditional>
                    </repeat>
                    <conditional name="genome">
                        <param name="ref_or_genome" value="contigs"/>
                        <param name="reference" value="7seqs.fna" />
                        <conditional name="cond_single_genome">
                            <param name="single_genome" value=""/>
                            <conditional name="genome_contig_definition">
                                <param name="choice" value="separator"/>
                                <param name="separator" value="~"/>
                            </conditional>
                        </conditional>
                    </conditional>
                    <param name="sharded" value="" />
                </conditional>
            </conditional>
            <param name="exclude_genomes_from_deshard" value="false"/>
            <section name="alignment">
                <param name="min_read_aligned_length" value="0" />
                <param name="min_read_percent_identity" value="0" />
                <param name="min_read_aligned_percent" value="0" />
                <conditional name="proper_pairs_only">
                    <param name="proper_pairs_only" value=""/>
                </conditional>
                <param name="exclude_supplementary" value=""/>
            </section>
            <section name="cov">
                <param name="methods" value="mean,relative_abundance,variance"/>
                <param name="trim_min" value="5"/>
                <param name="trim_max" value="95"/>
                <param name="min_covered_fraction" value="10"/>
                <param name="contig_end_exclusion" value="75"/>
            </section>
            <section name="derep">
                <conditional name="dereplicate">
                    <param name="dereplicate" value=""/>
                </conditional>
            </section>
            <section name="out">
                <param name="output_format" value="sparse"/>
                <param name="no_zeros" value=""/>
                <param name="dereplication_output_cluster_definition" value="" />
                <param name="dereplication_output_representative_fasta_directory_copy"  value="" />
            </section>
            <output name="output" ftype="tabular">
                <assert_contents>
                    <has_text text="Sample"/>
                    <has_text text="Genome"/>
                    <has_text text="Relative Abundance (%)"/>
                    <has_text text="Mean"/>
                    <has_text text="Variance"/>
                    <has_text text="7seqs.fna_0/reads_for_seq1_and_seq2_paired_collection_0"/>
                    <has_text text="unmapped"/>
                    <has_text text="genome6"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <conditional name="mapped">
                <param name="mapped" value="mapped" />
                <conditional name="mode">
                    <param name="mode" value="co"/>
                    <param name="bam_files" value="7seqs.reads_for_seq1_and_seq2.bam"/>
                    <param name="sharded" value="" />
                    <conditional name="genome">
                        <param name="ref_or_genome" value="contigs"/>
                        <conditional name="cond_single_genome">
                            <param name="single_genome" value=""/>
                            <conditional name="genome_contig_definition">
                                <param name="choice" value="separator"/>
                                <param name="separator" value="~"/>
                            </conditional>
                        </conditional>
                    </conditional>
                    <param name="sharded" value="" />
                </conditional>
            </conditional>
            <param name="exclude_genomes_from_deshard" value="false"/>
            <section name="alignment">
                <param name="min_read_aligned_length" value="0" />
                <param name="min_read_percent_identity" value="0" />
                <param name="min_read_aligned_percent" value="0" />
                <conditional name="proper_pairs_only">
                    <param name="proper_pairs_only" value=""/>
                </conditional>
                <param name="exclude_supplementary" value=""/>
            </section>
            <section name="cov">
                <param name="methods" value="mean,relative_abundance"/>
                <param name="trim_min" value="5"/>
                <param name="trim_max" value="95"/>
                <param name="min_covered_fraction" value="10"/>
                <param name="contig_end_exclusion" value="75"/>
            </section>
            <section name="derep">
                <conditional name="dereplicate">
                    <param name="dereplicate" value=""/>
                </conditional>
            </section>
            <section name="out">
                <param name="output_format" value="sparse"/>
                <param name="no_zeros" value=""/>
                <param name="dereplication_output_cluster_definition" value="" />
                <param name="dereplication_output_representative_fasta_directory_copy"  value="" />
            </section>
            <output name="output" ftype="tabular">
                <assert_contents>
                    <has_text text="Sample"/>
                    <has_text text="Genome"/>
                    <has_text text="Mean"/>
                    <has_text text="7seqs.reads_for_seq1_and_seq2"/>
                    <has_text text="genome1"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <conditional name="mapped">
                <param name="mapped" value="not-mapped" />
                <conditional name="mode">
                    <param name="mode" value="co"/>
                    <conditional name="read_type">
                        <param name="type" value="single"/>
                        <param name="single" value="1read.actually_fasta.fq"/>
                    </conditional>
                    <conditional name="genome">
                        <param name="ref_or_genome" value="genomic"/>
                        <conditional name="genomic">
                            <param name="source" value="builtin"/>
                            <param name="genome_fasta_files" value="500kb_name,1mbp_name"/>
                        </conditional>
                    </conditional>
                    <param name="sharded" value="" />
                </conditional>
            </conditional>
            <param name="exclude_genomes_from_deshard" value="false"/>
            <section name="alignment">
                <param name="min_read_aligned_length" value="0" />
                <param name="min_read_percent_identity" value="0" />
                <param name="min_read_aligned_percent" value="0" />
                <conditional name="proper_pairs_only">
                    <param name="proper_pairs_only" value=""/>
                </conditional>
                <param name="exclude_supplementary" value=""/>
            </section>
            <section name="cov">
                <param name="methods" value="covered_bases"/>
                <param name="trim_min" value="5"/>
                <param name="trim_max" value="95"/>
                <param name="min_covered_fraction" value="0"/>
                <param name="contig_end_exclusion" value="75"/>
            </section>
            <section name="derep">
                <conditional name="dereplicate">
                    <param name="dereplicate" value="--dereplicate"/>
                    <param name="dereplication_ani" value="99" />
                    <param name="dereplication_aligned_fraction" value="50" />
                    <param name="dereplication_fragment_length" value="3000" />
                    <param name="dereplication_quality_formula" value="Parks2020_reduced" />
                    <param name="dereplication_prethreshold_ani" value="95" />
                    <param name="dereplication_precluster_method" value="finch"/>
                </conditional>
                <param name="genome_info" value="genomeInfo.csv"/>
            </section>
            <section name="out">
                <param name="output_format" value="sparse"/>
                <param name="no_zeros" value=""/>
                <param name="dereplication_output_cluster_definition" value="" />
                <param name="dereplication_output_representative_fasta_directory_copy"  value="" />
            </section>
            <output name="output" ftype="tabular">
                <assert_contents>
                    <has_text text="Sample"/>
                    <has_text text="Genome"/>
                    <has_text text="Covered Bases"/>
                    <has_text text="1read.actually_fasta.fq_single_0"/>
                    <has_text text="1mbp"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="3">
            <conditional name="mapped">
                <param name="mapped" value="not-mapped" />
                <conditional name="mode">
                    <param name="mode" value="co"/>
                    <conditional name="read_type">
                        <param name="type" value="paired_collection"/>
                        <param name="paired_reads">
                            <collection type="list:paired">
                                <element name="reads_for_genome2">
                                    <collection type="paired">
                                        <element name="forward" value="reads_for_genome2.1.fa" ftype="fasta"/>
                                        <element name="reverse" value="reads_for_genome2.2.fa" ftype="fasta"/>
                                    </collection>
                                </element>
                            </collection>
                        </param>
                    </conditional>
                    <conditional name="genome">
                        <param name="ref_or_genome" value="genomic"/>
                        <conditional name="genomic">
                            <param name="source" value="history"/>
                            <param name="genome_fasta_files" value="genome1.fna,genome2.fna,genome3.fna"/>
                        </conditional>
                    </conditional>
                    <param name="sharded" value="" />
                </conditional>
            </conditional>
            <param name="exclude_genomes_from_deshard" value="false"/>
            <section name="alignment">
                <param name="min_read_aligned_length" value="0" />
                <param name="min_read_percent_identity" value="0" />
                <param name="min_read_aligned_percent" value="0" />
                <conditional name="proper_pairs_only">
                    <param name="proper_pairs_only" value=""/>
                </conditional>
                <param name="exclude_supplementary" value=""/>
            </section>
            <section name="cov">
                <param name="methods" value="mean"/>
                <param name="trim_min" value="5"/>
                <param name="trim_max" value="95"/>
                <param name="min_covered_fraction" value="0"/>
                <param name="contig_end_exclusion" value="75"/>
            </section>
            <section name="derep">
                <conditional name="dereplicate">
                    <param name="dereplicate" value="--dereplicate"/>
                    <param name="dereplication_ani" value="99" />
                    <param name="dereplication_aligned_fraction" value="50" />
                    <param name="dereplication_fragment_length" value="3000" />
                    <param name="dereplication_quality_formula" value="Parks2020_reduced" />
                    <param name="dereplication_prethreshold_ani" value="95" />
                    <param name="dereplication_precluster_method" value="finch"/>
                </conditional>
            </section>
            <section name="out">
                <param name="output_format" value="sparse"/>
                <param name="no_zeros" value=""/>
                <param name="dereplication_output_cluster_definition" value="true" />
                <param name="dereplication_output_representative_fasta_directory_copy" value="true" />
            </section>
            <output name="output" ftype="tabular">
                <assert_contents>
                    <has_text text="Genome"/>
                    <has_text text="Mean"/>
                    <has_text text="Sample"/>
                    <has_text text="reads_for_genome2_paired_collection_0"/>
                    <has_text text="genome2"/>
                    <has_text text="genome3"/>
                </assert_contents>
            </output>
            <output name="cluster_definition" ftype="tabular">
                <assert_contents>
                    <has_text text="genome1"/>
                    <has_text text="genome2"/>
                    <has_text text="genome3"/>
                </assert_contents>
            </output>
            <output_collection name="representative_fasta" type="list" count="3">
                <element name="genome1" ftype="fasta">
                    <assert_contents>
                        <has_text text=">random_sequence_length_500_1"/>
                        <has_text text=">random_sequence_length_500_2"/>
                    </assert_contents>
                </element>
                <element name="genome2" ftype="fasta">
                    <assert_contents>
                        <has_text text=">random_sequence_length_500_1"/>
                        <has_text text=">random_sequence_length_500_2"/>
                    </assert_contents>
                </element>
                <element name="genome3" ftype="fasta">
                    <assert_contents>
                        <has_text text=">random_sequence_length_500_1"/>
                        <has_text text=">random_sequence_length_500_2"/>
                    </assert_contents>
                </element>
            </output_collection>
        </test>
        <test expect_num_outputs="1">
            <conditional name="mapped">
                <param name="mapped" value="mapped" />
                <conditional name="mode">
                    <param name="mode" value="co"/>
                    <param name="bam_files" value="2seqs.bad_read.1.with_supplementary.bam"/>
                    <param name="sharded" value="" />
                    <conditional name="genome">
                        <param name="ref_or_genome" value="contigs"/>
                        <conditional name="cond_single_genome">
                            <param name="single_genome" value="true"/>
                            <conditional name="genome_contig_definition">
                                <param name="choice" value="default"/>
                            </conditional>
                        </conditional>
                    </conditional>
                    <param name="sharded" value="" />
                </conditional>
            </conditional>
            <param name="exclude_genomes_from_deshard" value="false"/>
            <section name="alignment">
                <param name="min_read_aligned_length" value="0" />
                <param name="min_read_percent_identity" value="0" />
                <param name="min_read_aligned_percent" value="0" />
                <conditional name="proper_pairs_only">
                    <param name="proper_pairs_only" value=""/>
                </conditional>
                <param name="exclude_supplementary" value=""/>
            </section>
            <section name="cov">
                <param name="methods" value="count"/>
                <param name="trim_min" value="5"/>
                <param name="trim_max" value="95"/>
                <param name="min_covered_fraction" value="0"/>
                <param name="contig_end_exclusion" value="75"/>
            </section>
            <section name="derep" >
                <conditional name="dereplicate">
                    <param name="dereplicate" value=""/>
                </conditional>
            </section>
            <section name="out">
                <param name="output_format" value="sparse"/>
                <param name="no_zeros" value=""/>
                <param name="dereplication_output_cluster_definition" value="" />
                <param name="dereplication_output_representative_fasta_directory_copy"  value="" />
            </section>
            <output name="output" ftype="tabular">
                <assert_contents>
                    <has_text text="Sample"/>
                    <has_text text="Genome"/>
                    <has_text text="Read Count"/>
                    <has_text text="2seqs.bad_read.1.with_supplementary"/>
                    <has_text text="genome1"/>
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
.. class:: infomark

**Dereplication quality formula**

Scoring function for genome quality:

- Parks2020_reduced: A quality formula described in `Parks et. al. 2020 <https://doi.org/10.1038/s41587-020-0501-8>`_ (Supplementary Table 19) but only including those scoring criteria that can be calculated from the sequence without homology searching: *completeness-5*contamination-5*num_contigs/100-5*num_ambiguous_bases/100000*.
- completeness-4contamination: *completeness-4*contamination*
- completeness-5contamination: *completeness-5*contamination*
- dRep: *completeness-5*contamination+contamination*(strain_heterogeneity/100)+0.5*log10(N50)*

-----

.. class:: infomark

**Method for calculating coverage**

Calculation of genome-wise coverage (genome mode) is similar to calculating contig-wise (contig mode) coverage, except that the unit of reporting is per-genome rather than per-contig. For calculation methods which exclude base positions based on their coverage, all positions from all contigs are considered together.

- Relative abundance: Percentage relative abundance of each genome, and the unmapped read percentage
- Mean: Average number of aligned reads overlapping each position on the genome
- Trimmed mean: Average number of aligned reads overlapping each position after removing the most deeply and shallowly covered positions.
- Covered fraction: Fraction of genome covered by 1 or more reads
- Covered bases: Number of bases covered by 1 or more reads
- Variance: Variance of coverage depths
- Length: Length of each genome in base pairs
- Count: Number of reads aligned toq each genome. Note that a single read may be aligned to multiple genomes with supplementary alignments
- Reads per base: Number of reads aligned divided by the length of the genome
- MetaBAT: Reproduction of the `MetaBAT <https://bitbucket.org/berkeleylab/metabat>`_ tool output
- Coverage histogram: Histogram of coverage depths
- RPKM: Reads mapped per kilobase of genome, per million mapped reads
- TPM: Transcripts Per Million as described in `Li et al 2010 <https://doi.org/10.1093/bioinformatics/btp692>`_

    ]]></help>
    <expand macro="citation"/>
</tool>
author	iuc
date	Sun, 25 Feb 2024 09:45:26 +0000
parents	847f274a60da
children