changeset 0:f350d182f786 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/binning_refiner/ commit 591fef692e8efeb65c5214e6512aeaaf66201b26"
author iuc
date Fri, 18 Feb 2022 13:12:43 +0000
parents
children 1c8d0916f97f
files binning_refiner.xml macros.xml test-data/Concoct_1.fa.gz test-data/Concoct_3.fa.gz test-data/Concoct_8.fa.gz test-data/MetaBAT_17.fa.gz test-data/MetaBAT_18.fa.gz test-data/MetaBAT_19.fa.gz test-data/MetaBAT_20.fa.gz test-data/MetaBAT_21.fa.gz test-data/MetaBAT_22.fa.gz test-data/MetaBAT_23.fa.gz test-data/output_refined_contigs.tabular test-data/output_sources_and_length.tabular
diffstat 14 files changed, 156 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/binning_refiner.xml	Fri Feb 18 13:12:43 2022 +0000
@@ -0,0 +1,125 @@
+<tool id="bin_refiner" name="Binning refiner" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>refines metagenome bins</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <command detect_errors="exit_code"><![CDATA[
+#import re
+
+## Binning refiner prepends the -p param value
+## (which we set to be the string 'refined' in
+## the command line) to a hard-coded string (i.e.,
+## '_Binning_refiner_outputs') to set the base
+## output directory.
+
+## Binning_refiner requires 2 levels of directories
+## for the input bins.
+mkdir -p input_bin_dir/bins &&
+#for $f in $input_bins:
+    #set identifier = re.sub('[^\s\w\-]', '_', str($f.element_identifier))
+    #set file_name = $identifier + '.' + $f.ext
+    ## Binning_refiner doesn't handle gzipped files.
+    #if $f.ext.endswith(".gz")
+        gunzip -c '${f}' > 'input_bin_dir/bins/${file_name}' &&
+    #else:
+        ln -s '${f}' 'input_bin_dir/bins/${file_name}' &&
+    #end if
+#end for
+
+Binning_refiner
+-i input_bin_dir
+-p 'refined'
+&& mv 'refined_Binning_refiner_outputs/refined_contigs.txt' '$output_refined_contigs'
+&& mv 'refined_Binning_refiner_outputs/refined_sources_and_length.txt' '$output_sources_and_length'
+    ]]></command>
+    <inputs>
+        <param name="input_bins" format="fasta,fasta.gz" type="data" multiple="true" label="Binned fasta files"/>
+        <param argument="-m" type="integer" value="512" label="Minimum size (Kbp) of refined bin" help="Bins smaller than this will be eliminated"/>
+    </inputs>
+    <outputs>
+        <collection name="output_refined_bins" type="list" label="${tool.name} on ${on_string}: (refined bins)">
+            <discover_datasets pattern="(?P&lt;designation&gt;.*)\.fasta" format="fasta" directory="refined_Binning_refiner_outputs/refined_refined_bins"/>
+        </collection>
+        <data name="output_refined_contigs" format="tabular" label="${tool.name} on ${on_string} (refined contigs)"/>
+        <data name="output_sources_and_length" format="tabular" label="${tool.name} on ${on_string} (sources and length)"/>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="3">
+            <param name="input_bins" value="MetaBAT_17.fa.gz,MetaBAT_18.fa.gz,MetaBAT_19.fa.gz,MetaBAT_20.fa.gz,MetaBAT_21.fa.gz,MetaBAT_22.fa.gz,MetaBAT_23.fa.gz,Concoct_1.fa.gz,Concoct_3.fa.gz,Concoct_8.fa.gz" ftype="fasta.gz"/>
+            <output_collection name="output_refined_bins" type="list" count="7">
+                <element name="refined_1" ftype="fasta">
+                    <assert_contents>
+                        <has_size value="1320640"/>
+                        <has_text text=">scaffold_2064"/>
+                        <has_n_lines n="21765"/>
+                    </assert_contents>
+                </element>
+                <element name="refined_2" ftype="fasta">
+                    <assert_contents>
+                        <has_size value="941488"/>
+                        <has_text text=">scaffold_1301"/>
+                        <has_n_lines n="15461"/>
+                    </assert_contents>
+                </element>
+                <element name="refined_3" ftype="fasta">
+                    <assert_contents>
+                        <has_size value="883642"/>
+                        <has_text text=">scaffold_262"/>
+                        <has_n_lines n="14495"/>
+                    </assert_contents>
+                </element>
+                <element name="refined_4" ftype="fasta">
+                    <assert_contents>
+                        <has_size value="758509"/>
+                        <has_text text=">scaffold_923"/>
+                        <has_n_lines n="12484"/>
+                    </assert_contents>
+                </element>
+                <element name="refined_5" ftype="fasta">
+                    <assert_contents>
+                        <has_size value="722197"/>
+                        <has_text text=">scaffold_232"/>
+                        <has_n_lines n="11849"/>
+                    </assert_contents>
+                </element>
+                <element name="refined_6" ftype="fasta">
+                    <assert_contents>
+                        <has_size value="637342"/>
+                        <has_text text=">scaffold_259"/>
+                        <has_n_lines n="10460"/>
+                    </assert_contents>
+                </element>
+                <element name="refined_7" ftype="fasta">
+                    <assert_contents>
+                        <has_size value="560996"/>
+                        <has_text text=">scaffold_1510"/>
+                        <has_n_lines n="9219"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output name="output_refined_contigs" file="output_refined_contigs.tabular" ftype="tabular"/>
+            <output name="output_sources_and_length" file="output_sources_and_length.tabular" ftype="tabular"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+**What it does**
+
+Reconciles the outputs of different binning programs with the aim to improve the quality of genome bins,
+especially with respect to contamination levels.
+
+The tool accepts one or more fasta datasets (i.e., bins) that were produced by metagenome binning tools
+(CONCOCT MaxBin2, MetaBAT2 and others).
+
+All refined bins larger than the specified "Minimum size (Kbp) of refined bin" will be output as a dataset
+collection of fasta files.  Additional outputs include a tabular dataset containing the id of the contigs
+in each refined bin (refined contigs) and another tabular dataset containing the size of each refined bin
+and the origin of its contigs (sources and length).
+
+**More information**
+
+https://github.com/songweizhi/Binning_refiner
+
+    ]]></help>
+    <expand macro="citations"/>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Fri Feb 18 13:12:43 2022 +0000
@@ -0,0 +1,15 @@
+<macros>
+    <token name="@TOOL_VERSION@">1.4.3</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@PROFILE@">21.09</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">binning_refiner</requirement>
+        </requirements>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1093/bioinformatics/btx086</citation>
+        </citations>
+    </xml>
+</macros>
Binary file test-data/Concoct_1.fa.gz has changed
Binary file test-data/Concoct_3.fa.gz has changed
Binary file test-data/Concoct_8.fa.gz has changed
Binary file test-data/MetaBAT_17.fa.gz has changed
Binary file test-data/MetaBAT_18.fa.gz has changed
Binary file test-data/MetaBAT_19.fa.gz has changed
Binary file test-data/MetaBAT_20.fa.gz has changed
Binary file test-data/MetaBAT_21.fa.gz has changed
Binary file test-data/MetaBAT_22.fa.gz has changed
Binary file test-data/MetaBAT_23.fa.gz has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output_refined_contigs.tabular	Fri Feb 18 13:12:43 2022 +0000
@@ -0,0 +1,8 @@
+Refined_bin	Contigs
+refined_1	scaffold_1016,scaffold_1020,scaffold_1044,scaffold_1066,scaffold_1069,scaffold_1073,scaffold_1086,scaffold_1096,scaffold_1116,scaffold_1122,scaffold_1129,scaffold_1137,scaffold_1167,scaffold_1170,scaffold_1201,scaffold_1253,scaffold_1275,scaffold_1287,scaffold_1296,scaffold_1306,scaffold_1331,scaffold_1333,scaffold_1350,scaffold_1351,scaffold_1353,scaffold_1385,scaffold_1395,scaffold_1425,scaffold_1442,scaffold_1452,scaffold_1455,scaffold_1470,scaffold_1474,scaffold_1499,scaffold_1514,scaffold_1538,scaffold_1541,scaffold_1555,scaffold_1556,scaffold_1559,scaffold_1570,scaffold_1572,scaffold_1599,scaffold_1612,scaffold_1625,scaffold_1628,scaffold_1634,scaffold_1646,scaffold_1771,scaffold_1779,scaffold_1781,scaffold_1808,scaffold_1827,scaffold_1847,scaffold_1879,scaffold_1891,scaffold_1906,scaffold_1909,scaffold_1921,scaffold_1934,scaffold_1936,scaffold_1952,scaffold_1960,scaffold_1985,scaffold_1995,scaffold_2033,scaffold_2039,scaffold_2064,scaffold_2080,scaffold_2102,scaffold_2124,scaffold_2128,scaffold_2172,scaffold_2196,scaffold_563,scaffold_583,scaffold_602,scaffold_655,scaffold_734,scaffold_752,scaffold_838,scaffold_895,scaffold_928,scaffold_929,scaffold_933,scaffold_945,scaffold_965,scaffold_967,scaffold_973
+refined_2	scaffold_1064,scaffold_1162,scaffold_1200,scaffold_1236,scaffold_1248,scaffold_1301,scaffold_138,scaffold_150,scaffold_1825,scaffold_2013,scaffold_2040,scaffold_205,scaffold_2134,scaffold_214,scaffold_2164,scaffold_247,scaffold_377,scaffold_380,scaffold_419,scaffold_486,scaffold_488,scaffold_558
+refined_3	scaffold_15,scaffold_20,scaffold_222,scaffold_262,scaffold_660,scaffold_684,scaffold_77,scaffold_861
+refined_4	scaffold_1014,scaffold_1061,scaffold_1081,scaffold_1177,scaffold_1193,scaffold_1257,scaffold_1349,scaffold_1377,scaffold_1428,scaffold_1503,scaffold_1520,scaffold_1558,scaffold_1576,scaffold_1598,scaffold_1635,scaffold_1698,scaffold_1704,scaffold_1759,scaffold_1763,scaffold_1784,scaffold_1867,scaffold_1945,scaffold_2046,scaffold_2139,scaffold_2210,scaffold_437,scaffold_537,scaffold_568,scaffold_667,scaffold_680,scaffold_699,scaffold_700,scaffold_768,scaffold_804,scaffold_832,scaffold_834,scaffold_923,scaffold_939,scaffold_986
+refined_5	scaffold_1048,scaffold_1049,scaffold_1150,scaffold_126,scaffold_14,scaffold_232,scaffold_571,scaffold_66
+refined_6	scaffold_1667,scaffold_186,scaffold_259,scaffold_301,scaffold_32,scaffold_461,scaffold_466,scaffold_659,scaffold_708
+refined_7	scaffold_1140,scaffold_1172,scaffold_1303,scaffold_1510,scaffold_1614,scaffold_1649,scaffold_1814,scaffold_1852,scaffold_2104,scaffold_2153,scaffold_260,scaffold_273,scaffold_361,scaffold_369,scaffold_429,scaffold_589,scaffold_906,scaffold_987,scaffold_988
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output_sources_and_length.tabular	Fri Feb 18 13:12:43 2022 +0000
@@ -0,0 +1,8 @@
+Refined_bin	Size(Kbp)	Source
+refined_1	1267.23	Concoct_3_fa_gz.fasta.gz
+refined_2	904.03	MetaBAT_17_fa_gz.fasta.gz
+refined_3	848.68	MetaBAT_18_fa_gz.fasta.gz
+refined_4	728.02	MetaBAT_20_fa_gz.fasta.gz
+refined_5	693.6	MetaBAT_21_fa_gz.fasta.gz
+refined_6	612.08	MetaBAT_22_fa_gz.fasta.gz
+refined_7	538.59	MetaBAT_23_fa_gz.fasta.gz