diff generate_sequence_features.xml @ 0:07bf5268724f draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
author iuc
date Fri, 14 Oct 2022 21:45:54 +0000
parents
children 0ae1a2636de5
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/generate_sequence_features.xml	Fri Oct 14 21:45:54 2022 +0000
@@ -0,0 +1,275 @@
+<tool id="semibin_generate_sequence_features" name="SemiBin: Generate sequence features" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>
+        (kmer and abundance) as training data for semi-supervised deep learning model training
+    </description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="biotools"/>
+    <expand macro="requirements"/>
+    <expand macro="version"/>
+    <command detect_errors="exit_code"><![CDATA[
+#import re
+@BAM_FILES@
+@FASTA_FILES@
+
+SemiBin
+#if $mode.select == 'single' or $mode.select == 'co'
+    generate_sequence_features_single
+#else
+    generate_sequence_features_multi
+    --separator '$separator'
+#end if
+    --input-fasta 'contigs.fasta'
+    --input-bam *.bam
+    --output 'output'
+    --threads \${GALAXY_SLOTS:-1}
+    @MIN_LEN@
+#if str($ml_threshold) != ''
+    --ml-threshold $ml_threshold
+#end if
+    ]]></command>
+    <inputs>
+        <expand macro="mode_fasta_bam"/>
+        <expand macro="min_len"/>
+        <expand macro="ml-threshold"/>
+        <param name="extra_output" type="select" multiple="true" label="Extra outputs" help="In addition to the training data">
+            <option value="coverage">Coverage files</option>
+            <option value="contigs">Contigs (if multiple sample)</option>
+        </param>
+    </inputs>
+    <outputs>
+        <expand macro="data_output_single"/>
+        <expand macro="data_output_multi"/>
+        <expand macro="generate_sequence_features_extra_outputs"/>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="4">
+            <conditional name="mode">
+                <param name="select" value="single"/>
+                <param name="input_fasta" ftype="fasta" value="input_single.fasta"/>
+                <param name="input_bam" ftype="bam" value="input_single.bam"/>
+            </conditional>
+            <conditional name="min_len">
+                <param name="method" value="automatic"/>
+            </conditional>
+            <param name="ml_threshold" value="4000"/>
+            <param name="extra_output" value="coverage"/>
+            <output name="single_data" ftype="csv">
+                <assert_contents>
+                    <has_n_lines n="41"/>
+                    <has_text text="g1k_0"/>
+                    <has_text text="g4k_9"/>
+                </assert_contents>
+            </output>
+            <output name="single_data_split" ftype="csv">
+                <assert_contents>
+                    <has_n_lines n="81"/>
+                    <has_text text="g1k_0_1"/>
+                    <has_text text="g3k_2_2"/>
+                    <has_text text="g4k_7_2"/>
+                </assert_contents>
+            </output>
+            <output name="single_cov" ftype="csv">
+                <assert_contents>
+                    <has_n_lines n="41"/>
+                    <has_text text="g1k_0"/>
+                </assert_contents>
+            </output>
+            <output name="single_split_cov" ftype="csv">
+                <assert_contents>
+                    <has_n_lines n="1" delta="1"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="4">
+            <conditional name="mode">
+                <param name="select" value="co"/>
+                <param name="input_fasta" ftype="fasta" value="input_single.fasta"/>
+                <param name="input_bam" ftype="bam" value="input_coassembly_sorted1.bam,input_coassembly_sorted2.bam,input_coassembly_sorted3.bam,input_coassembly_sorted4.bam,input_coassembly_sorted5.bam"/>
+            </conditional>
+            <conditional name="min_len">
+                <param name="method" value="automatic"/>
+            </conditional>
+            <param name="ml_threshold" value="4000"/>
+            <param name="extra_output" value="coverage"/>
+            <output name="single_data" ftype="csv">
+                <assert_contents>
+                    <has_n_lines n="41"/>
+                    <has_text text="g1k_0"/>
+                    <has_text text="g4k_9"/>
+                </assert_contents>
+            </output>
+            <output name="single_data_split" ftype="csv">
+                <assert_contents>
+                    <has_n_lines n="81"/>
+                    <has_text text="g1k_0_1"/>
+                    <has_text text="g3k_2_2"/>
+                    <has_text text="g4k_7_2"/>
+                </assert_contents>
+            </output>
+            <output_collection name="co_cov" count="5">
+                <element name="0" ftype="csv">
+                    <assert_contents>
+                        <has_n_lines n="41"/>
+                        <has_text text="g1k_0"/>
+                    </assert_contents>
+                </element>
+                <element name="4" ftype="csv">
+                    <assert_contents>
+                        <has_n_lines n="41"/>
+                        <has_text text="g1k_0"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output_collection name="co_split_cov" count="5">
+                <element name="0" ftype="csv">
+                    <assert_contents>
+                        <has_n_lines n="81"/>
+                        <has_text text="g1k_0_1"/>
+                    </assert_contents>
+                </element>
+                <element name="4" ftype="csv">
+                    <assert_contents>
+                        <has_n_lines n="81"/>
+                        <has_text text="g1k_0_1"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
+        <test expect_num_outputs="7">
+            <conditional name="mode">
+                <param name="select" value="multi"/>
+                <conditional name="multi_fasta">
+                    <param name="select" value="concatenated"/>
+                    <param name="input_fasta" ftype="fasta" value="input_multi.fasta.gz"/>
+                </conditional>
+                <param name="input_bam" ftype="bam" value="input_multi_sorted1.bam,input_multi_sorted2.bam,input_multi_sorted3.bam,input_multi_sorted4.bam,input_multi_sorted5.bam,input_multi_sorted6.bam,input_multi_sorted7.bam,input_multi_sorted8.bam,input_multi_sorted9.bam,input_multi_sorted10.bam"/>
+            </conditional>
+            <conditional name="min_len">
+                <param name="method" value="automatic"/>
+            </conditional>
+            <param name="ml_threshold" value="4000"/>
+            <param name="extra_output" value="coverage,contigs"/>
+            <output_collection name="multi_data" count="10">
+                <element name="S1" ftype="csv">
+                    <assert_contents>
+                        <has_n_lines n="21"/>
+                        <has_text text="g1k_0"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output_collection name="multi_data_split" count="10">
+                <element name="S1" ftype="csv">
+                    <assert_contents>
+                        <has_n_lines n="41"/>
+                        <has_text text="g1k_0_1"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output_collection name="multi_cov" count="10">
+                <element name="0" ftype="csv">
+                    <assert_contents>
+                        <has_n_lines n="201"/>
+                        <has_text text="S1:g1k_5"/>
+                    </assert_contents>
+                </element>
+                <element name="9" ftype="csv">
+                    <assert_contents>
+                        <has_n_lines n="201"/>
+                        <has_text text="S1:g1k_5"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output_collection name="multi_cov_sample" count="10">
+                <element name="S1" ftype="csv">
+                    <assert_contents>
+                        <has_n_lines n="21"/>
+                        <has_text text="g1k_0"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output_collection name="multi_split_cov" count="10">
+                <element name="1" ftype="csv">
+                    <assert_contents>
+                        <has_n_lines n="401"/>
+                        <has_text text="S1:g1k_5_1"/>
+                    </assert_contents>
+                </element>
+                <element name="9" ftype="csv">
+                    <assert_contents>
+                        <has_n_lines n="401"/>
+                        <has_text text="S1:g1k_5_1"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output_collection name="multi_split_cov_sample" count="10">
+                <element name="S1" ftype="csv">
+                    <assert_contents>
+                        <has_n_lines n="41"/>
+                        <has_text text="g1k_5_1"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output_collection name="multi_contigs" count="10">
+                <element name="S1" ftype="fasta">
+                    <assert_contents>
+                        <has_text text=">g1k_0"/>
+                    </assert_contents>
+                </element>
+                <element name="S9" ftype="fasta">
+                    <assert_contents>
+                        <has_text text=">g1k_0"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
+        <test expect_num_outputs="2">
+            <conditional name="mode">
+                <param name="select" value="multi"/>
+                <conditional name="multi_fasta">
+                    <param name="select" value="multi"/>
+                    <param name="input_fasta" ftype="fasta" value="S1.fasta,S2.fasta,S3.fasta,S4.fasta,S5.fasta,S6.fasta,S7.fasta,S8.fasta,S9.fasta,S10.fasta"/>
+                </conditional>
+                <param name="input_bam" ftype="bam" value="input_multi_sorted1.bam,input_multi_sorted2.bam,input_multi_sorted3.bam,input_multi_sorted4.bam,input_multi_sorted5.bam,input_multi_sorted6.bam,input_multi_sorted7.bam,input_multi_sorted8.bam,input_multi_sorted9.bam,input_multi_sorted10.bam"/>
+            </conditional>
+            <conditional name="min_len">
+                <param name="method" value="automatic"/>
+            </conditional>
+            <param name="ml_threshold" value="4000"/>
+            <output_collection name="multi_data" count="10">
+                <element name="S1" ftype="csv">
+                    <assert_contents>
+                        <has_n_lines n="21"/>
+                        <has_text text="g1k_0"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output_collection name="multi_data_split" count="10">
+                <element name="S1" ftype="csv">
+                    <assert_contents>
+                        <has_n_lines n="41"/>
+                        <has_text text="g1k_0_1"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
+    </tests>
+    <help><![CDATA[
+@HELP_HEADER@
+
+This tool generates sequence features (kmer and abundance) as training data for semi-supervised deep learning model training.
+
+Inputs
+======
+
+@HELP_INPUT_FASTA@
+
+Outputs
+=======
+
+@HELP_DATA@
+
+    ]]></help>
+    <expand macro="citations"/>
+</tool>
\ No newline at end of file