view kofamscan.xml @ 4:2eb7932e91a3 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kofamscan commit 431ba943dcd0a584a64f28924396f3508c84f6b9
author iuc
date Thu, 25 Apr 2024 20:57:51 +0000
parents c3c46f14c7a4
children 8ec7636fa875
line wrap: on
line source

<tool id="kofamscan" name="KofamScan" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.01">
    <description>gene function annotation based on KEGG orthology and HMM</description>
    <macros>
        <token name="@TOOL_VERSION@">1.3.0</token>
        <token name="@VERSION_SUFFIX@">3</token>
        <xml name="reportannotation" token_selected="">
            <param name="reportannotation" type="boolean" truevalue="--report-unannotated" falsevalue="--no-report-unannotated" checked="@SELECTED@" label="Include sequence name to outputs even if no KOs are assigned?"/>
        </xml>
    </macros>
    <xrefs>
        <xref type="bio.tools">kofamscan</xref>
    </xrefs>
    <requirements>
        <requirement type="package" version="@TOOL_VERSION@">kofamscan</requirement>
        <requirement type="package" version="3.0">zip</requirement>
    </requirements>
    <version_command><![CDATA[exec_annotation --version | cut -d " " -f 2]]></version_command>
    <command detect_errors="exit_code"><![CDATA[
#if $p_cond.p_sel != "cached"
    ## preprocessing
    mkdir ./profile &&
    mkdir ./temp_extract &&
    #if $p_cond.p_sel == 'compressed'
        tar -xf '${p_cond.p}' -C temp_extract &&
        find ./temp_extract/ -name '*.hmm' -exec mv {} ./profile \; &&
        find ./temp_extract/ -name '*.hal' -exec mv {} ./profile \; &&
    #elif $p_cond.p_sel == 'hmm'
        ## input files require prefix 'K' and file extension '.hmm'
        #for $i, $current in enumerate($p_cond.p)
            ln -s '$current' 'profile/K${i}.hmm' &&
        #end for
    #end if
#end if

## run
exec_annotation

#if $p_cond.p_sel != "cached"
    -p 'profile'
    -k '$p_cond.k'
#else
    -p '$p_cond.kofam.fields.profile_dir/#if $p_cond.kofam_subset then $p_cond.kofam_subset else ""#'
    -k '$p_cond.kofam.fields.ko_list'
#end if
-o 'result.txt'
--cpu \${GALAXY_SLOTS:-4}
-E $E
-T $ap.T
-f '$ap.f_cond.f_sel'
$ap.f_cond.reportannotation
#if 'alignments' in $ap.out
    --create-alignment
#end if
'$query'

## postprocessing
#if 'alignments' in $ap.out
    && test -d 'tmp/alignment' && zip -q -r 'tmp/alignments.zip' tmp/alignment/* || echo 'No alignment files.'
#end if
    ]]></command>
    <inputs>
        <param name="query" type="data" format="fasta" label="Select query sequence file" help="Nucleotide sequences are not accepted."/>
        <conditional name="p_cond">
            <param name="p_sel" type="select" label="Select profile database format">
                <option value="cached" selected="true">Cached</option>
                <option value="compressed">Compressed set of HMM and HAL file(s)</option>
                <option value="hmm">HMM file(s)</option>
            </param>
            <when value="cached">
                <param name="kofam" type="select">
                    <options from_data_table="kofam"/>
                </param>
                <param name="kofam_subset" type="select" optional="true">
                    <options from_data_table="kofam_subset">
                        <filter type="param_value" column="2" ref="kofam"/>
                        <filter type="sort_by" column="1"/>
                        <filter type="unique_value" column="1"/>
                        <column name="name" index="1"/>
                        <column name="value" index="0"/>
                    </options>
                    <validator type="no_options" message="No option available for this input"/>
                </param>
            </when>
            <when value="compressed">
                <param argument="-p" type="data" format="tar" label="Select a compressed file with HMM and HAL file(s)" help="Compressed archives are available from KofamKOALA web service (https://www.genome.jp/tools/kofamkoala/)."/>
                <param argument="-k" type="data" format="tabular" label="Select KO list file"/>
            </when>
            <when value="hmm">
                <param argument="-p" type="data" format="hmm3" multiple="true" label="Select profile HMM file(s)"/>
                <param argument="-k" type="data" format="tabular" label="Select KO list file"/>
            </when>
        </conditional>
        <param argument="-E" type="float" min="0.0" max="1.0" value="0.01" label="Set E-value threshold"/>
        <section name="ap" title="Advanced parameters" expanded="true">
            <param argument="-T" type="integer" value="1" label="Set threshold scale" help="The score thresholds will be multiplied by this value."/>
            <conditional name="f_cond">
                <param name="f_sel" type="select" label="Select output format">
                    <option value="detail">Details for each hit (including hits below threshold) (detail)</option>
                    <option value="detail-tsv" selected="true">Tab separeted values for detail format (detail-tsv)</option>
                    <option value="mapper">KEGG Mapper compatible format (mapper)</option>
                    <option value="mapper-one-line">KEGG Mapper compatible format, but all hit KOs are listed in one line (mapper-oneline)</option>
                </param>
                <when value="detail">
                    <expand macro="reportannotation" selected="false"/>
                </when>
                <when value="detail-tsv">
                    <expand macro="reportannotation" selected="false"/>
                </when>
                <when value="mapper">
                    <expand macro="reportannotation" selected="true"/>
                </when>
                <when value="mapper-one-line">
                    <expand macro="reportannotation" selected="true"/>
                </when>
            </conditional>
            <param name="out" type="select" multiple="true" optional="false" label="Select output file(s)" help="Only shown in history if selected here and generated by the specific run.">
                <option value="result" selected="true">KofamScan Result</option>
                <option value="alignments">HMMER alignments</option>
                <option value="output">HMMER run output</option>
                <option value="tabular">HMMER tabular summary</option>
            </param>
        </section>
    </inputs>
    <outputs>
        <data name="out_result" format="txt" from_work_dir="result.txt" label="${tool.name} on ${on_string}: Results">
            <filter>'result' in ap['out']</filter>
            <change_format>
                <when input='ap.f_cond.f_sel' value="detail-tsv" format="tabular" />
            </change_format>
        </data>
        <data name="out_alignments" format="zip" from_work_dir="tmp/alignments.zip" label="${tool.name} on ${on_string}: HMMER alignments">
            <filter>'alignments' in ap['out']</filter>
        </data>
        <data name="out_output" format="txt" from_work_dir="tmp/output/output.txt" label="${tool.name} on ${on_string}: HMMER output">
            <filter>'output' in ap['out']</filter>
        </data>
        <data name="out_tabular" format="txt" from_work_dir="tmp/tabular/tabular.txt" label="${tool.name} on ${on_string}: HMMER tabular">
            <filter>'tabular' in ap['out']</filter>
        </data>
    </outputs>
    <tests>
        <!-- #1 default -->
        <test expect_num_outputs="1">
            <param name="query" value="query.fasta"/>
            <conditional name="p_cond">
                <param name="p_sel" value="compressed"/>
                <param name="p" value="profiles.tar.gz"/>
            </conditional>
            <param name="k" value="ko"/>
            <output name="out_result">
                <assert_contents>
                    <has_n_lines n="5"/>
                    <has_text_matching expression=".+sp\|P00329\|ADH1_MOUSE.+"/>
                </assert_contents>
            </output>
        </test>
        <!-- #2 -->
        <test expect_num_outputs="4">
            <param name="query" value="query.fasta"/>
            <conditional name="p_cond">
                <param name="p_sel" value="hmm"/>
                <param name="p" value="profiles/K00001.hmm,profiles/K00002.hmm,profiles/K00003.hmm"/>
            </conditional>
            <param name="k" value="ko"/>
            <param name="E" value="0.02"/>
            <section name="ap">
                <param name="T" value="2"/>
                <conditional name="f_cond">
                    <param name="f_sel" value="detail-tsv"/>
                    <param name="reportannotation" value="true"/>
                </conditional>
                <param name="out" value="alignments,output,result,tabular"/>
            </section>
            <output name="out_alignments">
                <assert_contents>
                    <has_size value="4099"/>
                </assert_contents>
            </output>
            <output name="out_output">
                <assert_contents>
                    <has_n_lines n="224"/>
                    <has_line line="Internal pipeline statistics summary:"/>
                </assert_contents>
            </output>
            <output name="out_result" ftype="tabular">
                <assert_contents>
                    <has_n_lines n="9"/>
                    <has_text_matching expression=".+sp\|P19858\|LDHA_BOVIN"/>
                </assert_contents>
            </output>
            <output name="out_tabular">
                <assert_contents>
                    <has_n_lines n="48"/>
                    <has_line line="K1"/>
                </assert_contents>
            </output>
        </test>
        <!-- #3 -->
        <test expect_num_outputs="1">
            <param name="query" value="query.fasta"/>
            <conditional name="p_cond">
                <param name="p_sel" value="hmm"/>
                <param name="p" value="profiles/K00001.hmm,profiles/K00002.hmm,profiles/K00003.hmm"/>
            </conditional>
            <param name="k" value="ko"/>
            <section name="ap">
                <conditional name="f_cond">
                    <param name="f_sel" value="mapper"/>
                </conditional>
            </section>
            <output name="out_result" ftype="txt">
                <assert_contents>
                    <has_n_lines n="7"/>
                    <has_line line="sp|P19858|LDHA_BOVIN"/>
                </assert_contents>
            </output>
        </test>
        <!-- #4 -->
        <test expect_num_outputs="1">
            <param name="query" value="query.fasta"/>
            <conditional name="p_cond">
                <param name="p_sel" value="hmm"/>
                <param name="p" value="profiles/K00001.hmm,profiles/K00002.hmm,profiles/K00003.hmm"/>
            </conditional>
            <param name="k" value="ko"/>
            <section name="ap">
                <conditional name="f_cond">
                    <param name="f_sel" value="mapper-one-line"/>
                </conditional>
            </section>
            <output name="out_result">
                <assert_contents>
                    <has_n_lines n="7"/>
                    <has_line line="sp|P19858|LDHA_BOVIN"/>
                </assert_contents>
            </output>
        </test>
        <!-- #5 -->
        <test expect_num_outputs="1">
            <param name="query" value="query.fasta"/>
            <conditional name="p_cond">
                <param name="p_sel" value="compressed"/>
                <param name="p" value="profilesTyp2.tar.gz"/>
            </conditional>
            <param name="k" value="ko"/>
            <output name="out_result">
                <assert_contents>
                    <has_n_lines n="5"/>
                    <has_text_matching expression=".+sp\|P00329\|ADH1_MOUSE.+"/>
                </assert_contents>
            </output>
        </test>
        <!-- #6 test with cached data -->
        <test expect_num_outputs="1">
            <param name="query" value="query.fasta"/>
            <conditional name="p_cond">
                <param name="p_sel" value="cached"/>
                <param name="kofam" value="test_value"/>
            </conditional>
            <param name="k" value="ko"/>
            <section name="ap">
                <conditional name="f_cond">
                    <param name="f_sel" value="mapper-one-line"/>
                </conditional>
            </section>
            <output name="out_result" ftype="txt">
                <assert_contents>
                    <has_n_lines n="7"/>
                    <has_line line="sp|P19858|LDHA_BOVIN"/>
                </assert_contents>
            </output>
        </test>
        <!-- #6 test with cached data + subset -->
        <test expect_num_outputs="1">
            <param name="query" value="query.fasta"/>
            <conditional name="p_cond">
                <param name="p_sel" value="cached"/>
                <param name="kofam" value="test_value"/>
                <param name="kofam_subset" value="SUBSET"/>
            </conditional>
            <param name="k" value="ko"/>
            <section name="ap">
                <conditional name="f_cond">
                    <param name="f_sel" value="mapper-one-line"/>
                </conditional>
            </section>
            <output name="out_result" ftype="txt">
                <assert_contents>
                    <has_n_lines n="7"/>
                    <has_line line="sp|P19858|LDHA_BOVIN"/>
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
.. class:: infomark

**What it does**

KofamScan is a gene function annotation tool based on KEGG Orthology and hidden Markov model.

KofamScan assigns K numbers to the user's sequence data by HMMER/HMMSEARCH against KOfam (a customized HMM database of KEGG Orthologs (KOs)). K number assignments with scores above the predefined thresholds for individual KOs are more reliable than other proposed assignments. Such high score assignments are highlighted with asterisks '*' in the output. The K number assignments facilitate the interpretation of the annotation results by linking the user's sequence data to the KEGG pathways and EC numbers.

**Input**

- a query file in FASTA format with one or more amino acid sequences. Each sequence must have a unique name. A name of a sequence is a string between the header symbol (">") and the first blank character (whitespace, tab, line break, etc.). Do not put a whitespace right after ">".

    ::

        >sp|P00325|ADH1B_HUMAN Alcohol dehydrogenase 1B OS=Homo sapiens GN=ADH1B PE=1 SV=2
        MSTAGKVIKCKAAVLWEVKKPFSIEDVEVAPPKAYEVRIKMVAVGICRTDDHVVSGNLVT

- a KO list file of KOfam

    ::

        knum    threshold   score_type  profile_type    F-measure   nseq    nseq_used   alen    mlen    eff_nseq    re/pos  definition
        K00001  361.33  domain  trim    0.326825    1601    1149    1538    393 13.33   0.590   alcohol dehydrogenase [EC:1.1.1.1]

- KOfam profile files in HMM3 format or a compressed dataset containing HMM3 profiles and HAL filtering files available `here <ftp://ftp.genome.jp/pub/db/kofam/>`_.

**Output**

- KofamScan output
- HMMER results as alignments, run output and tabular summary

.. class:: infomark

**References**

More information are available on `GitHub <https://github.com/takaram/kofam_scan>`_ and the `KofamKOALA webserver <https://www.genome.jp/tools/kofamkoala/>`_.
    ]]></help>
    <citations>
        <citation type="doi">10.1093/bioinformatics/btz859</citation>
    </citations>
</tool>