view amas_summary.xml @ 0:5c00398809f9 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/amas commit 158ec0e635067d354c425baf14b95cb616fd93c4
author iuc
date Tue, 02 Dec 2025 09:28:20 +0000
parents
children
line wrap: on
line source

<tool id="amas_summary" name="AMAS summary" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>summarise multiple alignments</description>
    
    <macros>
        <import>macros.xml</import>
    </macros>

    <xrefs>
        <xref type="bio.tools">amas</xref>
    </xrefs>

    <expand macro="requirements" />
    <expand macro="version_command" />

    <command detect_errors="exit_code"><![CDATA[
        #import re
        set -eu;

        @SNIFF_INPUT_FORMAT@

        @CHECK_INTERLEAVED@

        @SYMLINK_INPUTS@

        python -m amas.AMAS
        summary
        $by_taxon
        --in-files
            @INPUT_FILENAMES@
        --in-format "\${IN_FORMAT}"
        --data-type $data_type
        --cores "\${GALAXY_SLOTS:-1}"
        $check_align
    ]]></command>

    <inputs>
        <param name="input_files" type="data" format="fasta,phylip,nex" label="Sequence(s) to summarise" multiple="true" 
               help="Provide pre-aligned FASTA/PHYLIP/NEXUS files (DNA or protein); mixes of unaligned reads or contigs will produce meaningless results." />
        <param argument="--by-taxon" type="boolean" label="Also emit per-taxon summaries" checked="false" truevalue="--by-taxon" falsevalue="" />
        <expand macro="data_type" />
        <expand macro="check_align" />
    </inputs>

    <outputs>
        <data name="summary_out" from_work_dir="summary.txt" format="txt" label="${tool.name} on ${on_string}: Alignment summary" />

        <collection name="taxon_summaries" type="list" label="${tool.name} on ${on_string}: Per-taxon summaries">
            <discover_datasets pattern="(?P&lt;name&gt;.+-seq-summary)\.txt" format="txt" />
        </collection>
    </outputs>

    <tests>
        <test expect_num_outputs="2">
            <param name="input_files" value="inputs/fasta1.fas" />
            <param name="by_taxon" value="true" />
            <param name="data_type" value="dna" />
            <param name="check_align" value="false" />
            <output name="summary_out" file="outputs/expected_summary.txt" />
            <output_collection name="taxon_summaries" type="list">
                <element name="fasta1.fas-seq-summary" file="outputs/expected_taxa_summary.txt" ftype="txt" />
            </output_collection>
        </test>
    </tests>

    <help><![CDATA[
        **What it does**

        AMAS Summary calculates comprehensive statistics for sequence alignments, providing quality control metrics essential for phylogenomic analyses.

        **Inputs**

        - **Alignment files**: One or more pre-aligned sequence files (FASTA, PHYLIP, or NEXUS format)
        - **Input format**: Specify the format of your input files
        - **Data type**: Choose DNA for nucleotide sequences or Protein for amino acid sequences
        - **Generate per-taxon summaries**: Optionally create detailed statistics for each sequence

        **Outputs**

        1. **Summary table** - Overall statistics for each alignment including:
            - Number of taxa and alignment length
            - Total matrix cells and proportion of missing data
            - Variable sites and parsimony-informative sites
            - GC content (DNA) or amino acid composition (protein)

        2. **Per-taxon summaries** (optional): Individual statistics for each sequence showing taxon-specific missing data and character frequencies

        **Statistics explained**

        - **Variable sites**: Positions with more than one character state (measures sequence diversity)
        - **Parsimony-informative sites**: Positions useful for phylogenetic inference (at least 2 taxa share each of 2+ states)
        - **Missing data**: Proportion of gaps, N's (DNA), or X's (protein)
        - **Matrix completeness**: Percentage of positions with actual sequence data

        **Use cases**

        - **Quality control**: Identify alignments with excessive missing data
        - **Alignment comparison**: Compare statistics across multiple genes/loci
        - **Taxon filtering**: Find sequences with poor coverage
        - **Publication reporting**: Generate standardized alignment statistics for methods sections

        @AMAS_SHARED_HELP@
    ]]></help>

    <expand macro="citations" />
</tool>