view generate_sequence_features.xml @ 2:ff4e487f4f7e draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/semibin commit 13abac83068b126399ec415141007a48c2efaa84
author iuc
date Fri, 10 Nov 2023 20:50:57 +0000
parents 0ae1a2636de5
children
line wrap: on
line source

<tool id="semibin_generate_sequence_features" name="SemiBin: Generate sequence features" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>
        (kmer and abundance) as training data for semi-supervised deep learning model training
    </description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="biotools"/>
    <expand macro="requirements"/>
    <expand macro="version"/>
    <command detect_errors="exit_code"><![CDATA[
#import re
@BAM_FILES@
@FASTA_FILES@

SemiBin2
#if $mode.select == 'single' or $mode.select == 'co'
    generate_sequence_features_single
#else
    generate_sequence_features_multi
    --separator '$separator'
#end if
    --input-fasta 'contigs.fasta'
    --input-bam *.bam
    --output 'output'
    --threads \${GALAXY_SLOTS:-1}
    @MIN_LEN@
#if str($ml_threshold) != ''
    --ml-threshold $ml_threshold
#end if
    ]]></command>
    <inputs>
        <expand macro="mode_fasta_bam"/>
        <expand macro="min_len"/>
        <expand macro="ml-threshold"/>

        <param name="extra_output" type="select" multiple="true" label="Extra outputs" help="In addition to the training data">
            <option value="coverage">Coverage files</option>
            <option value="contigs">Contigs (if multiple sample)</option>
        </param>
    </inputs>
    <outputs>
        <expand macro="data_output_single"/>
        <expand macro="data_output_multi"/>
        <expand macro="generate_sequence_features_extra_outputs"/>
    </outputs>
    <tests>
        <test expect_num_outputs="4">
            <conditional name="mode">
                <param name="select" value="single"/>
                <param name="input_fasta" ftype="fasta" value="input_single.fasta"/>
                <param name="input_bam" ftype="bam" value="input_single.bam"/>
            </conditional>
            <conditional name="min_len">
                <param name="method" value="automatic"/>
            </conditional>
            <param name="ml_threshold" value="4000"/>
            <param name="extra_output" value="coverage"/>
            <output name="single_data" ftype="csv">
                <assert_contents>
                    <has_n_lines n="41"/>
                    <has_text text="g1k_0"/>
                    <has_text text="g4k_9"/>
                </assert_contents>
            </output>
            <output name="single_data_split" ftype="csv">
                <assert_contents>
                    <has_n_lines n="81"/>
                    <has_text text="g1k_0_1"/>
                    <has_text text="g3k_2_2"/>
                    <has_text text="g4k_7_2"/>
                </assert_contents>
            </output>
            <output name="single_cov" ftype="csv">
                <assert_contents>
                    <has_n_lines n="41"/>
                    <has_text text="g1k_0"/>
                </assert_contents>
            </output>
            <output name="single_split_cov" ftype="csv">
                <assert_contents>
                    <has_n_lines n="1" delta="1"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="4">
            <conditional name="mode">
                <param name="select" value="co"/>
                <param name="input_fasta" ftype="fasta" value="input_single.fasta"/>
                <param name="input_bam" ftype="bam" value="input_coassembly_sorted1.bam,input_coassembly_sorted2.bam,input_coassembly_sorted3.bam,input_coassembly_sorted4.bam,input_coassembly_sorted5.bam"/>
            </conditional>
            <conditional name="min_len">
                <param name="method" value="automatic"/>
            </conditional>
            <param name="ml_threshold" value="4000"/>
            <param name="extra_output" value="coverage"/>
            <output name="single_data" ftype="csv">
                <assert_contents>
                    <has_n_lines n="41"/>
                    <has_text text="g1k_0"/>
                    <has_text text="g4k_9"/>
                </assert_contents>
            </output>
            <output name="single_data_split" ftype="csv">
                <assert_contents>
                    <has_n_lines n="81"/>
                    <has_text text="g1k_0_1"/>
                    <has_text text="g3k_2_2"/>
                    <has_text text="g4k_7_2"/>
                </assert_contents>
            </output>
            <output_collection name="co_cov" count="5">
                <element name="0" ftype="csv">
                    <assert_contents>
                        <has_n_lines n="41"/>
                        <has_text text="g1k_0"/>
                    </assert_contents>
                </element>
                <element name="4" ftype="csv">
                    <assert_contents>
                        <has_n_lines n="41"/>
                        <has_text text="g1k_0"/>
                    </assert_contents>
                </element>
            </output_collection>
            <output_collection name="co_split_cov" count="5">
                <element name="0" ftype="csv">
                    <assert_contents>
                        <has_n_lines n="81"/>
                        <has_text text="g1k_0_1"/>
                    </assert_contents>
                </element>
                <element name="4" ftype="csv">
                    <assert_contents>
                        <has_n_lines n="81"/>
                        <has_text text="g1k_0_1"/>
                    </assert_contents>
                </element>
            </output_collection>
        </test>
        <test expect_num_outputs="7">
            <conditional name="mode">
                <param name="select" value="multi"/>
                <conditional name="multi_fasta">
                    <param name="select" value="concatenated"/>
                    <param name="input_fasta" ftype="fasta" value="input_multi.fasta.gz"/>
                </conditional>
                <param name="input_bam" ftype="bam" value="input_multi_sorted1.bam,input_multi_sorted2.bam,input_multi_sorted3.bam,input_multi_sorted4.bam,input_multi_sorted5.bam,input_multi_sorted6.bam,input_multi_sorted7.bam,input_multi_sorted8.bam,input_multi_sorted9.bam,input_multi_sorted10.bam"/>
            </conditional>
            <conditional name="min_len">
                <param name="method" value="automatic"/>
            </conditional>
            <param name="ml_threshold" value="4000"/>
            <param name="extra_output" value="coverage,contigs"/>
            <output_collection name="multi_data" count="10">
                <element name="S1" ftype="csv">
                    <assert_contents>
                        <has_n_lines n="21"/>
                        <has_text text="g1k_0"/>
                    </assert_contents>
                </element>
            </output_collection>
            <output_collection name="multi_data_split" count="10">
                <element name="S1" ftype="csv">
                    <assert_contents>
                        <has_n_lines n="41"/>
                        <has_text text="g1k_0_1"/>
                    </assert_contents>
                </element>
            </output_collection>
            <output_collection name="multi_cov" count="10">
                <element name="0" ftype="csv">
                    <assert_contents>
                        <has_n_lines n="201"/>
                        <has_text text="S1:g1k_5"/>
                    </assert_contents>
                </element>
                <element name="9" ftype="csv">
                    <assert_contents>
                        <has_n_lines n="201"/>
                        <has_text text="S1:g1k_5"/>
                    </assert_contents>
                </element>
            </output_collection>
            <output_collection name="multi_cov_sample" count="10">
                <element name="S1" ftype="csv">
                    <assert_contents>
                        <has_n_lines n="21"/>
                        <has_text text="g1k_0"/>
                    </assert_contents>
                </element>
            </output_collection>
            <output_collection name="multi_split_cov" count="10">
                <element name="1" ftype="csv">
                    <assert_contents>
                        <has_n_lines n="401"/>
                        <has_text text="S1:g1k_5_1"/>
                    </assert_contents>
                </element>
                <element name="9" ftype="csv">
                    <assert_contents>
                        <has_n_lines n="401"/>
                        <has_text text="S1:g1k_5_1"/>
                    </assert_contents>
                </element>
            </output_collection>
            <output_collection name="multi_split_cov_sample" count="10">
                <element name="S1" ftype="csv">
                    <assert_contents>
                        <has_n_lines n="41"/>
                        <has_text text="g1k_5_1"/>
                    </assert_contents>
                </element>
            </output_collection>
            <output_collection name="multi_contigs" count="10">
                <element name="S1" ftype="fasta">
                    <assert_contents>
                        <has_text text=">g1k_0"/>
                    </assert_contents>
                </element>
                <element name="S9" ftype="fasta">
                    <assert_contents>
                        <has_text text=">g1k_0"/>
                    </assert_contents>
                </element>
            </output_collection>
        </test>
        <test expect_num_outputs="2">
            <conditional name="mode">
                <param name="select" value="multi"/>
                <conditional name="multi_fasta">
                    <param name="select" value="multi"/>
                    <param name="input_fasta" ftype="fasta" value="S1.fasta,S2.fasta,S3.fasta,S4.fasta,S5.fasta,S6.fasta,S7.fasta,S8.fasta,S9.fasta,S10.fasta"/>
                </conditional>
                <param name="input_bam" ftype="bam" value="input_multi_sorted1.bam,input_multi_sorted2.bam,input_multi_sorted3.bam,input_multi_sorted4.bam,input_multi_sorted5.bam,input_multi_sorted6.bam,input_multi_sorted7.bam,input_multi_sorted8.bam,input_multi_sorted9.bam,input_multi_sorted10.bam"/>
            </conditional>
            <conditional name="min_len">
                <param name="method" value="automatic"/>
            </conditional>
            <param name="ml_threshold" value="4000"/>
            <output_collection name="multi_data" count="10">
                <element name="S1" ftype="csv">
                    <assert_contents>
                        <has_n_lines n="21"/>
                        <has_text text="g1k_0"/>
                    </assert_contents>
                </element>
            </output_collection>
            <output_collection name="multi_data_split" count="10">
                <element name="S1" ftype="csv">
                    <assert_contents>
                        <has_n_lines n="41"/>
                        <has_text text="g1k_0_1"/>
                    </assert_contents>
                </element>
            </output_collection>
        </test>
    </tests>
    <help><![CDATA[
@HELP_HEADER@

This tool generates sequence features (kmer and abundance) as training data for semi-supervised deep learning model training.

Inputs
======

@HELP_INPUT_FASTA@

Outputs
=======

@HELP_DATA@

    ]]></help>
    <expand macro="citations"/>
</tool>