diff binning_refiner.xml @ 0:f350d182f786 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/binning_refiner/ commit 591fef692e8efeb65c5214e6512aeaaf66201b26"
author iuc
date Fri, 18 Feb 2022 13:12:43 +0000
parents
children 1c8d0916f97f
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/binning_refiner.xml	Fri Feb 18 13:12:43 2022 +0000
@@ -0,0 +1,125 @@
+<tool id="bin_refiner" name="Binning refiner" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>refines metagenome bins</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <command detect_errors="exit_code"><![CDATA[
+#import re
+
+## Binning refiner prepends the -p param value
+## (which we set to be the string 'refined' in
+## the command line) to a hard-coded string (i.e.,
+## '_Binning_refiner_outputs') to set the base
+## output directory.
+
+## Binning_refiner requires 2 levels of directories
+## for the input bins.
+mkdir -p input_bin_dir/bins &&
+#for $f in $input_bins:
+    #set identifier = re.sub('[^\s\w\-]', '_', str($f.element_identifier))
+    #set file_name = $identifier + '.' + $f.ext
+    ## Binning_refiner doesn't handle gzipped files.
+    #if $f.ext.endswith(".gz")
+        gunzip -c '${f}' > 'input_bin_dir/bins/${file_name}' &&
+    #else:
+        ln -s '${f}' 'input_bin_dir/bins/${file_name}' &&
+    #end if
+#end for
+
+Binning_refiner
+-i input_bin_dir
+-p 'refined'
+&& mv 'refined_Binning_refiner_outputs/refined_contigs.txt' '$output_refined_contigs'
+&& mv 'refined_Binning_refiner_outputs/refined_sources_and_length.txt' '$output_sources_and_length'
+    ]]></command>
+    <inputs>
+        <param name="input_bins" format="fasta,fasta.gz" type="data" multiple="true" label="Binned fasta files"/>
+        <param argument="-m" type="integer" value="512" label="Minimum size (Kbp) of refined bin" help="Bins smaller than this will be eliminated"/>
+    </inputs>
+    <outputs>
+        <collection name="output_refined_bins" type="list" label="${tool.name} on ${on_string}: (refined bins)">
+            <discover_datasets pattern="(?P&lt;designation&gt;.*)\.fasta" format="fasta" directory="refined_Binning_refiner_outputs/refined_refined_bins"/>
+        </collection>
+        <data name="output_refined_contigs" format="tabular" label="${tool.name} on ${on_string} (refined contigs)"/>
+        <data name="output_sources_and_length" format="tabular" label="${tool.name} on ${on_string} (sources and length)"/>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="3">
+            <param name="input_bins" value="MetaBAT_17.fa.gz,MetaBAT_18.fa.gz,MetaBAT_19.fa.gz,MetaBAT_20.fa.gz,MetaBAT_21.fa.gz,MetaBAT_22.fa.gz,MetaBAT_23.fa.gz,Concoct_1.fa.gz,Concoct_3.fa.gz,Concoct_8.fa.gz" ftype="fasta.gz"/>
+            <output_collection name="output_refined_bins" type="list" count="7">
+                <element name="refined_1" ftype="fasta">
+                    <assert_contents>
+                        <has_size value="1320640"/>
+                        <has_text text=">scaffold_2064"/>
+                        <has_n_lines n="21765"/>
+                    </assert_contents>
+                </element>
+                <element name="refined_2" ftype="fasta">
+                    <assert_contents>
+                        <has_size value="941488"/>
+                        <has_text text=">scaffold_1301"/>
+                        <has_n_lines n="15461"/>
+                    </assert_contents>
+                </element>
+                <element name="refined_3" ftype="fasta">
+                    <assert_contents>
+                        <has_size value="883642"/>
+                        <has_text text=">scaffold_262"/>
+                        <has_n_lines n="14495"/>
+                    </assert_contents>
+                </element>
+                <element name="refined_4" ftype="fasta">
+                    <assert_contents>
+                        <has_size value="758509"/>
+                        <has_text text=">scaffold_923"/>
+                        <has_n_lines n="12484"/>
+                    </assert_contents>
+                </element>
+                <element name="refined_5" ftype="fasta">
+                    <assert_contents>
+                        <has_size value="722197"/>
+                        <has_text text=">scaffold_232"/>
+                        <has_n_lines n="11849"/>
+                    </assert_contents>
+                </element>
+                <element name="refined_6" ftype="fasta">
+                    <assert_contents>
+                        <has_size value="637342"/>
+                        <has_text text=">scaffold_259"/>
+                        <has_n_lines n="10460"/>
+                    </assert_contents>
+                </element>
+                <element name="refined_7" ftype="fasta">
+                    <assert_contents>
+                        <has_size value="560996"/>
+                        <has_text text=">scaffold_1510"/>
+                        <has_n_lines n="9219"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output name="output_refined_contigs" file="output_refined_contigs.tabular" ftype="tabular"/>
+            <output name="output_sources_and_length" file="output_sources_and_length.tabular" ftype="tabular"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+**What it does**
+
+Reconciles the outputs of different binning programs with the aim to improve the quality of genome bins,
+especially with respect to contamination levels.
+
+The tool accepts one or more fasta datasets (i.e., bins) that were produced by metagenome binning tools
+(CONCOCT MaxBin2, MetaBAT2 and others).
+
+All refined bins larger than the specified "Minimum size (Kbp) of refined bin" will be output as a dataset
+collection of fasta files.  Additional outputs include a tabular dataset containing the id of the contigs
+in each refined bin (refined contigs) and another tabular dataset containing the size of each refined bin
+and the origin of its contigs (sources and length).
+
+**More information**
+
+https://github.com/songweizhi/Binning_refiner
+
+    ]]></help>
+    <expand macro="citations"/>
+</tool>