Mercurial > repos > iuc > semibin_generate_sequence_features
diff generate_sequence_features.xml @ 0:07bf5268724f draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
author | iuc |
---|---|
date | Fri, 14 Oct 2022 21:45:54 +0000 |
parents | |
children | 0ae1a2636de5 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/generate_sequence_features.xml Fri Oct 14 21:45:54 2022 +0000 @@ -0,0 +1,275 @@ +<tool id="semibin_generate_sequence_features" name="SemiBin: Generate sequence features" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description> + (kmer and abundance) as training data for semi-supervised deep learning model training + </description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="biotools"/> + <expand macro="requirements"/> + <expand macro="version"/> + <command detect_errors="exit_code"><![CDATA[ +#import re +@BAM_FILES@ +@FASTA_FILES@ + +SemiBin +#if $mode.select == 'single' or $mode.select == 'co' + generate_sequence_features_single +#else + generate_sequence_features_multi + --separator '$separator' +#end if + --input-fasta 'contigs.fasta' + --input-bam *.bam + --output 'output' + --threads \${GALAXY_SLOTS:-1} + @MIN_LEN@ +#if str($ml_threshold) != '' + --ml-threshold $ml_threshold +#end if + ]]></command> + <inputs> + <expand macro="mode_fasta_bam"/> + <expand macro="min_len"/> + <expand macro="ml-threshold"/> + <param name="extra_output" type="select" multiple="true" label="Extra outputs" help="In addition to the training data"> + <option value="coverage">Coverage files</option> + <option value="contigs">Contigs (if multiple sample)</option> + </param> + </inputs> + <outputs> + <expand macro="data_output_single"/> + <expand macro="data_output_multi"/> + <expand macro="generate_sequence_features_extra_outputs"/> + </outputs> + <tests> + <test expect_num_outputs="4"> + <conditional name="mode"> + <param name="select" value="single"/> + <param name="input_fasta" ftype="fasta" value="input_single.fasta"/> + <param name="input_bam" ftype="bam" value="input_single.bam"/> + </conditional> + <conditional name="min_len"> + <param name="method" value="automatic"/> + </conditional> + <param name="ml_threshold" value="4000"/> + <param name="extra_output" value="coverage"/> + <output name="single_data" ftype="csv"> + <assert_contents> + <has_n_lines n="41"/> + <has_text text="g1k_0"/> + <has_text text="g4k_9"/> + </assert_contents> + </output> + <output name="single_data_split" ftype="csv"> + <assert_contents> + <has_n_lines n="81"/> + <has_text text="g1k_0_1"/> + <has_text text="g3k_2_2"/> + <has_text text="g4k_7_2"/> + </assert_contents> + </output> + <output name="single_cov" ftype="csv"> + <assert_contents> + <has_n_lines n="41"/> + <has_text text="g1k_0"/> + </assert_contents> + </output> + <output name="single_split_cov" ftype="csv"> + <assert_contents> + <has_n_lines n="1" delta="1"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="4"> + <conditional name="mode"> + <param name="select" value="co"/> + <param name="input_fasta" ftype="fasta" value="input_single.fasta"/> + <param name="input_bam" ftype="bam" value="input_coassembly_sorted1.bam,input_coassembly_sorted2.bam,input_coassembly_sorted3.bam,input_coassembly_sorted4.bam,input_coassembly_sorted5.bam"/> + </conditional> + <conditional name="min_len"> + <param name="method" value="automatic"/> + </conditional> + <param name="ml_threshold" value="4000"/> + <param name="extra_output" value="coverage"/> + <output name="single_data" ftype="csv"> + <assert_contents> + <has_n_lines n="41"/> + <has_text text="g1k_0"/> + <has_text text="g4k_9"/> + </assert_contents> + </output> + <output name="single_data_split" ftype="csv"> + <assert_contents> + <has_n_lines n="81"/> + <has_text text="g1k_0_1"/> + <has_text text="g3k_2_2"/> + <has_text text="g4k_7_2"/> + </assert_contents> + </output> + <output_collection name="co_cov" count="5"> + <element name="0" ftype="csv"> + <assert_contents> + <has_n_lines n="41"/> + <has_text text="g1k_0"/> + </assert_contents> + </element> + <element name="4" ftype="csv"> + <assert_contents> + <has_n_lines n="41"/> + <has_text text="g1k_0"/> + </assert_contents> + </element> + </output_collection> + <output_collection name="co_split_cov" count="5"> + <element name="0" ftype="csv"> + <assert_contents> + <has_n_lines n="81"/> + <has_text text="g1k_0_1"/> + </assert_contents> + </element> + <element name="4" ftype="csv"> + <assert_contents> + <has_n_lines n="81"/> + <has_text text="g1k_0_1"/> + </assert_contents> + </element> + </output_collection> + </test> + <test expect_num_outputs="7"> + <conditional name="mode"> + <param name="select" value="multi"/> + <conditional name="multi_fasta"> + <param name="select" value="concatenated"/> + <param name="input_fasta" ftype="fasta" value="input_multi.fasta.gz"/> + </conditional> + <param name="input_bam" ftype="bam" value="input_multi_sorted1.bam,input_multi_sorted2.bam,input_multi_sorted3.bam,input_multi_sorted4.bam,input_multi_sorted5.bam,input_multi_sorted6.bam,input_multi_sorted7.bam,input_multi_sorted8.bam,input_multi_sorted9.bam,input_multi_sorted10.bam"/> + </conditional> + <conditional name="min_len"> + <param name="method" value="automatic"/> + </conditional> + <param name="ml_threshold" value="4000"/> + <param name="extra_output" value="coverage,contigs"/> + <output_collection name="multi_data" count="10"> + <element name="S1" ftype="csv"> + <assert_contents> + <has_n_lines n="21"/> + <has_text text="g1k_0"/> + </assert_contents> + </element> + </output_collection> + <output_collection name="multi_data_split" count="10"> + <element name="S1" ftype="csv"> + <assert_contents> + <has_n_lines n="41"/> + <has_text text="g1k_0_1"/> + </assert_contents> + </element> + </output_collection> + <output_collection name="multi_cov" count="10"> + <element name="0" ftype="csv"> + <assert_contents> + <has_n_lines n="201"/> + <has_text text="S1:g1k_5"/> + </assert_contents> + </element> + <element name="9" ftype="csv"> + <assert_contents> + <has_n_lines n="201"/> + <has_text text="S1:g1k_5"/> + </assert_contents> + </element> + </output_collection> + <output_collection name="multi_cov_sample" count="10"> + <element name="S1" ftype="csv"> + <assert_contents> + <has_n_lines n="21"/> + <has_text text="g1k_0"/> + </assert_contents> + </element> + </output_collection> + <output_collection name="multi_split_cov" count="10"> + <element name="1" ftype="csv"> + <assert_contents> + <has_n_lines n="401"/> + <has_text text="S1:g1k_5_1"/> + </assert_contents> + </element> + <element name="9" ftype="csv"> + <assert_contents> + <has_n_lines n="401"/> + <has_text text="S1:g1k_5_1"/> + </assert_contents> + </element> + </output_collection> + <output_collection name="multi_split_cov_sample" count="10"> + <element name="S1" ftype="csv"> + <assert_contents> + <has_n_lines n="41"/> + <has_text text="g1k_5_1"/> + </assert_contents> + </element> + </output_collection> + <output_collection name="multi_contigs" count="10"> + <element name="S1" ftype="fasta"> + <assert_contents> + <has_text text=">g1k_0"/> + </assert_contents> + </element> + <element name="S9" ftype="fasta"> + <assert_contents> + <has_text text=">g1k_0"/> + </assert_contents> + </element> + </output_collection> + </test> + <test expect_num_outputs="2"> + <conditional name="mode"> + <param name="select" value="multi"/> + <conditional name="multi_fasta"> + <param name="select" value="multi"/> + <param name="input_fasta" ftype="fasta" value="S1.fasta,S2.fasta,S3.fasta,S4.fasta,S5.fasta,S6.fasta,S7.fasta,S8.fasta,S9.fasta,S10.fasta"/> + </conditional> + <param name="input_bam" ftype="bam" value="input_multi_sorted1.bam,input_multi_sorted2.bam,input_multi_sorted3.bam,input_multi_sorted4.bam,input_multi_sorted5.bam,input_multi_sorted6.bam,input_multi_sorted7.bam,input_multi_sorted8.bam,input_multi_sorted9.bam,input_multi_sorted10.bam"/> + </conditional> + <conditional name="min_len"> + <param name="method" value="automatic"/> + </conditional> + <param name="ml_threshold" value="4000"/> + <output_collection name="multi_data" count="10"> + <element name="S1" ftype="csv"> + <assert_contents> + <has_n_lines n="21"/> + <has_text text="g1k_0"/> + </assert_contents> + </element> + </output_collection> + <output_collection name="multi_data_split" count="10"> + <element name="S1" ftype="csv"> + <assert_contents> + <has_n_lines n="41"/> + <has_text text="g1k_0_1"/> + </assert_contents> + </element> + </output_collection> + </test> + </tests> + <help><![CDATA[ +@HELP_HEADER@ + +This tool generates sequence features (kmer and abundance) as training data for semi-supervised deep learning model training. + +Inputs +====== + +@HELP_INPUT_FASTA@ + +Outputs +======= + +@HELP_DATA@ + + ]]></help> + <expand macro="citations"/> +</tool> \ No newline at end of file