Mercurial > repos > galaxy-australia > metawrapmg_binning
changeset 4:cbd58ca70eac draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/metawrapmg commit dc852a2f4ea6662882c1d55e6a18431d1ab4ecc7
line wrap: on
line diff
--- a/macros.xml Tue Mar 11 12:23:05 2025 +0000 +++ b/macros.xml Sun Nov 02 21:02:14 2025 +0000 @@ -1,7 +1,7 @@ <macros> <token name="@TOOL_VERSION@">1.3.0</token> - <token name="@VERSION_SUFFIX@">2</token> - <token name="@PROFILE@">22.05</token> + <token name="@VERSION_SUFFIX@">3</token> + <token name="@PROFILE@">24.0</token> <xml name="requirements"> <requirements> <requirement type="package" version="@TOOL_VERSION@">metawrap-mg</requirement>
--- a/metawrapmg_binning.xml Tue Mar 11 12:23:05 2025 +0000 +++ b/metawrapmg_binning.xml Sun Nov 02 21:02:14 2025 +0000 @@ -1,10 +1,14 @@ <tool id="metawrapmg_binning" name="MetaWRAP" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@" license="MIT"> - <description>metagenome binning pipeline</description> + <description>metagenome binning</description> <macros> <import>macros.xml</import> </macros> <expand macro="xrefs"/> - <expand macro="requirements"/> + + <requirements> + <requirement type="package" version="@TOOL_VERSION@">metawrap-binning</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ ## set memory usage if [ -n "\${GALAXY_MEMORY_MB}" ] ; then @@ -33,66 +37,20 @@ -t "\${GALAXY_SLOTS:-4}" reads_1.fastq reads_2.fastq - && - - ## Check which binning programs produced bins - bin_dirs=(INITIAL_BINNING/concoct_bins INITIAL_BINNING/maxbin2_bins INITIAL_BINNING/metabat2_bins) && - switches=('-A' '-B' '-C') && - - i=0 && - bin_string="" && - - for dir in "\${bin_dirs[@]}" ; do - if [ "\$(find "\$dir" -mindepth 1 -maxdepth 1 -exec echo found \;)" ]; then - bin_string+=" \${switches[\$i]} \$dir" ; - ((i++)) ; - fi - done && - - #################### - ## BIN REFINEMENT ## - #################### - - ## The checkm database is in the conda package, see - ## https://github.com/bioconda/bioconda-recipes/pull/38299. - - metawrap bin_refinement - -t "\${GALAXY_SLOTS:-4}" - -m "\${GALAXY_MEMORY_GB:-16}" - '$hidden_quick' - -c '${binning.c}' - -x '${binning.x}' - -o BIN_REFINEMENT - ## Only run bin_refinement on bins with contigs - "\${bin_string}" ]]></command> <inputs> <param name="metagenome" format="fasta" type="data" label="Metagenome" help="Metagenome co-assembly for binning"/> <param name="input" type="data_collection" collection_type="paired" label="Reads" help="Original reads that were used for the assembly"/> - <section name="binning" title="Binning parameters" expanded="false"> - <param argument="-c" type="integer" value="70" min="50" max="100" label="Percent completion" help="Minimum % completion of bins"/> - <param argument="-x" type="integer" value="10" min="0" max="100" label="Percent contamination" help="Maximum % contamination of bins that is acceptable"/> - </section> - <!-- the pplacer component requires 40 GB per thread. Skip pplacer for - testing by setting this to "quick" --> - <param name="hidden_quick" type="hidden" value=""/> </inputs> <outputs> - <!-- contigs binned into fasta files --> - <collection name="metawrap_bins" type="list" label="MetaWRAP on ${on_string}: bins"> - <discover_datasets pattern="metawrap_\d+_\d+_bins/(?P<designation>.+)\.fa" format="fasta" directory="BIN_REFINEMENT" recurse="true" match_relative_path="true"/> - </collection> - <!-- summary figures --> - <collection name="metawrap_figures" type="list" label="MetaWRAP on ${on_string}: summary figures"> - <discover_datasets pattern="__designation_and_ext__" directory="BIN_REFINEMENT/figures"/> + <collection name="concoct_bins" type="list" label="${tool.name} on ${on_string}: concoct bins"> + <discover_datasets pattern="(?P<designation>.+)\.fa" format="fasta" directory="INITIAL_BINNING/concoct_bins" recurse="true" match_relative_path="true"/> </collection> - <!-- statistics on binning --> - <collection name="metawrap_stats" type="list" label="MetaWRAP on ${on_string}: stat files"> - <discover_datasets pattern="(?P<designation>.+)\.stats" format="tabular" directory="BIN_REFINEMENT"/> + <collection name="maxbin2_bins" type="list" label="${tool.name} on ${on_string}: maxbin2 bins"> + <discover_datasets pattern="(?P<designation>.+)\.fa" format="fasta" directory="INITIAL_BINNING/maxbin2_bins" recurse="true" match_relative_path="true"/> </collection> - <!-- which contig went into which bin --> - <collection name="metawrap_contigs" type="list" label="MetaWRAP on ${on_string}: contig assignments"> - <discover_datasets pattern="(?P<designation>.+)\.contigs" format="tabular" directory="BIN_REFINEMENT"/> + <collection name="metabat_bins" type="list" label="${tool.name} on ${on_string}: metabat2 bins"> + <discover_datasets pattern="(?P<designation>.+)\.fa" format="fasta" directory="INITIAL_BINNING/metabat2_bins" recurse="true" match_relative_path="true"/> </collection> </outputs> <tests> @@ -105,24 +63,21 @@ <element name="reverse" value="mapped_reads.r2.fastq.gz"/> </collection> </param> - <section name="binning"> - <param name="c" value="60"/> - <param name="x" value="15"/> - </section> - <param name="hidden_quick" value="--quick"/> - <output_collection name="metawrap_bins" type="list"> + <output_collection name="concoct_bins" type="list" count="27"> <element name="bin.1" ftype="fasta"> <assert_contents> - <has_text text="NODE_2_length_"/> + <has_text text=">NODE_"/> </assert_contents> </element> </output_collection> - <output_collection name="metawrap_stats" type="list"> - <element name="metawrap_60_15_bins" file="test02.stats" ftype="tabular"/> + <output_collection name="maxbin2_bins" type="list" count="2"> + <element name="bin.1" ftype="fasta"> + <assert_contents> + <has_text text="NODE_"/> + </assert_contents> + </element> </output_collection> - <output_collection name="metawrap_contigs" type="list"> - <element name="metawrap_60_15_bins" file="test02.contigs" ftype="tabular"/> - </output_collection> + <output_collection name="metabat_bins" type="list" count="0"/> </test> </tests> <help><![CDATA[ @@ -146,41 +101,15 @@ paired end reads from any number of samples are aligned to it. The alignments are sorted and compressed with samtools, and library insert size statistics are also gathered at the same time (insert size average -and standard deviation). metaBAT2’s jgi_summarize_bam_contig_depths +and standard deviation). metaBAT2's jgi_summarize_bam_contig_depths function is used to generate contig adundance table, and it is then converted into the correct format for each of the three binners to take as input. After MaxBin2, metaBAT2, and CONCOCT finish binning the contigs with default settings, the final bins folders are created with -formatted bin fasta files. CheckM’s lineage_wf function is used to +formatted bin fasta files. CheckM's lineage_wf function is used to predict essential genes and estimate the completion and contamination of each bin. -MetaWRAP bin refinement -~~~~~~~~~~~~~~~~~~~~~~~ - -The metaWRAP::Bin_refinement module utilizes a hybrid approach to take -in two or three bin sets that were obtained with different software and -produces a consolidated, improved bin set. First, binning_refiner is -used to create hybridized bins from every possible combination of sets. -If there were three bin sets: A, B, and C, then the following hybrid -sets will be produced with binning_refiner: AB, BC, AC, and ABC. CheckM -is then run to evaluate the completion and contamination of the bins in -each of the 7 bin sets (3 originals, 4 hybridized). The bins sets are -then iteratively compared to each other, and each pair is consolidated -into an improved bin set. To do this, the same bin is identified within -the two bin sets based on a minimum of 80% overlap in genome length, and -the better bin is determined based on which bin has the higher score. -The scoring function is S=Completion-5*Contamination. After all bin sets -are incorporated into the consolidated bin collection, a de-replication -function removes any duplicate contigs. If a contig is present in more -than one bin, it is removed from all but the best bin (based on scoring -function). CheckM is then run on the final bin set and a final report -file is generated showing the completion, contamination, and other -statistics generated by CheckM for each bin. Completion and -contamination rank plots are also generated to evaluate the success of -the Bin_refinement module, and compare its output to the quality of the -original bins. - -------------- MetaWRAP’s home page is
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/metawrapmg_refine.xml Sun Nov 02 21:02:14 2025 +0000 @@ -0,0 +1,171 @@ +<tool id="metawrapmg_bin_refinement" name="MetaWRAP" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@" license="MIT"> + <description>metagenome bin refinement</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="xrefs"/> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">metawrap-refinement</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + ## set memory usage + if [ -n "\${GALAXY_MEMORY_MB}" ] ; then + export GALAXY_MEMORY_GB="\$((GALAXY_MEMORY_MB / 1024))" ; + fi ; + + mkdir -p INITIAL_BINNING/A && + #for i, a in enumerate($A) + #if $a.ext.endswith(".gz") + gunzip -c '$a' > INITIAL_BINNING/A/bin.${i}.fa && + #else + cp '$a' INITIAL_BINNING/A/bin.${i}.fa && + #end if + #end for + + #if $B + mkdir -p INITIAL_BINNING/B && + #for i, b in enumerate($B) + #if $b.ext.endswith(".gz") + gunzip -c '$b' > INITIAL_BINNING/B/bin.${i}.fa && + #else + cp '$b' INITIAL_BINNING/B/bin.${i}.fa && + #end if + #end for + #end if + + #if $C + mkdir -p INITIAL_BINNING/C && + #for i, c in enumerate($C) + #if $c.ext.endswith(".gz") + gunzip -c '$c' > INITIAL_BINNING/C/bin.${i}.fa && + #else + cp '$c' INITIAL_BINNING/C/bin.${i}.fa && + #end if + #end for + #end if + + #################### + ## BIN REFINEMENT ## + #################### + + ## The checkm database is in the conda package, see + ## https://github.com/bioconda/bioconda-recipes/pull/38299. + + metawrap bin_refinement + -t "\${GALAXY_SLOTS:-4}" + -m "\${GALAXY_MEMORY_GB:-16}" + '$hidden_quick' + -c '${binning.c}' + -x '${binning.x}' + -o BIN_REFINEMENT + ## Only run bin_refinement on bins with contigs + -A INITIAL_BINNING/A/ + #if $B and len($B) + -B INITIAL_BINNING/B/ + #end if + #if $C and len($C) + -C INITIAL_BINNING/C/ + #end if + ]]></command> + <inputs> + <param argument="-A" type="data" multiple="true" format="fasta,fasta.gz" label="Metagenomic bins"/> + <param argument="-B" type="data" multiple="true" optional="true" format="fasta,fasta.gz" label="Another set of metagenomic bins"/> + <param argument="-C" type="data" multiple="true" optional="true" format="fasta,fasta.gz" label="Another set of metagenomic bins"/> + <section name="binning" title="Binning parameters" expanded="false"> + <param argument="-c" type="integer" value="70" min="50" max="100" label="Percent completion" help="Minimum % completion of bins"/> + <param argument="-x" type="integer" value="10" min="0" max="100" label="Percent contamination" help="Maximum % contamination of bins that is acceptable"/> + </section> + <!-- the pplacer component requires 40 GB per thread. Skip pplacer for + testing by setting this to "quick" --> + <param name="hidden_quick" type="hidden" value=""/> + </inputs> + <outputs> + <!-- contigs binned into fasta files --> + <collection name="metawrap_bins" type="list" label="MetaWRAP on ${on_string}: bins"> + <discover_datasets pattern="metawrap_\d+_\d+_bins/(?P<designation>.+)\.fa" format="fasta" directory="BIN_REFINEMENT" recurse="true" match_relative_path="true"/> + </collection> + <!-- summary figures --> + <collection name="metawrap_figures" type="list" label="MetaWRAP on ${on_string}: summary figures"> + <discover_datasets pattern="__designation_and_ext__" directory="BIN_REFINEMENT/figures"/> + </collection> + <!-- statistics on binning --> + <collection name="metawrap_stats" type="list" label="MetaWRAP on ${on_string}: stat files"> + <discover_datasets pattern="(?P<designation>.+)\.stats" format="tabular" directory="BIN_REFINEMENT"/> + </collection> + <!-- which contig went into which bin --> + <collection name="metawrap_contigs" type="list" label="MetaWRAP on ${on_string}: contig assignments"> + <discover_datasets pattern="(?P<designation>.+)\.contigs" format="tabular" directory="BIN_REFINEMENT"/> + </collection> + </outputs> + <tests> + <!-- 01: basic function --> + <test> + <param name="A" ftype="fasta.gz" value="concoct_bins/bin.0.fa.gz,concoct_bins/bin.1.fa.gz,concoct_bins/bin.2.fa.gz,concoct_bins/bin.3.fa.gz,concoct_bins/bin.4.fa.gz,concoct_bins/bin.5.fa.gz,concoct_bins/bin.6.fa.gz,concoct_bins/bin.7.fa.gz,concoct_bins/bin.8.fa.gz,concoct_bins/bin.9.fa.gz,concoct_bins/bin.10.fa.gz,concoct_bins/bin.11.fa.gz,concoct_bins/bin.12.fa.gz,concoct_bins/bin.13.fa.gz,concoct_bins/bin.14.fa.gz,concoct_bins/bin.15.fa.gz,concoct_bins/bin.16.fa.gz,concoct_bins/bin.17.fa.gz,concoct_bins/bin.18.fa.gz,concoct_bins/bin.19.fa.gz,concoct_bins/bin.20.fa.gz,concoct_bins/bin.21.fa.gz,concoct_bins/bin.22.fa.gz,concoct_bins/bin.23.fa.gz,concoct_bins/bin.24.fa.gz,concoct_bins/bin.25.fa.gz,concoct_bins/bin.26.fa.gz"/> + <param name="B" ftype="fasta.gz" value="maxbin2_bins/bin.0.fa.gz,maxbin2_bins/bin.1.fa.gz"/> + <section name="binning"> + <param name="c" value="60"/> + <param name="x" value="15"/> + </section> + <param name="hidden_quick" value="--quick"/> + <output_collection name="metawrap_bins" type="list"> + <element name="bin.1" ftype="fasta"> + <assert_contents> + <has_text text="NODE_2_length_"/> + </assert_contents> + </element> + </output_collection> + <output_collection name="metawrap_stats" type="list"> + <element name="metawrap_60_15_bins" file="test02.stats" ftype="tabular"/> + </output_collection> + <output_collection name="metawrap_contigs" type="list"> + <element name="metawrap_60_15_bins" file="test02.contigs" ftype="tabular"/> + </output_collection> + </test> + </tests> + <help><![CDATA[ +MetaWRAP +-------- + +MetaWRAP aims to be an easy-to-use metagenomic wrapper suite that +accomplishes the core tasks of metagenomic analysis. Additionally, +metaWRAP takes bin extraction and analysis to the next level. metaWRAP +is meant to be a fast and simple approach before you delve deeper into +parameterization of your analysis. MetaWRAP can be applied to a variety +of environments, including gut, water, and soil microbiomes (see +metaWRAP paper for benchmarks). + +MetaWRAP bin refinement +~~~~~~~~~~~~~~~~~~~~~~~ + +The metaWRAP::Bin_refinement module utilizes a hybrid approach to take +in two or three bin sets that were obtained with different software and +produces a consolidated, improved bin set. First, binning_refiner is +used to create hybridized bins from every possible combination of sets. +If there were three bin sets: A, B, and C, then the following hybrid +sets will be produced with binning_refiner: AB, BC, AC, and ABC. CheckM +is then run to evaluate the completion and contamination of the bins in +each of the 7 bin sets (3 originals, 4 hybridized). The bins sets are +then iteratively compared to each other, and each pair is consolidated +into an improved bin set. To do this, the same bin is identified within +the two bin sets based on a minimum of 80% overlap in genome length, and +the better bin is determined based on which bin has the higher score. +The scoring function is S=Completion-5*Contamination. After all bin sets +are incorporated into the consolidated bin collection, a de-replication +function removes any duplicate contigs. If a contig is present in more +than one bin, it is removed from all but the best bin (based on scoring +function). CheckM is then run on the final bin set and a final report +file is generated showing the completion, contamination, and other +statistics generated by CheckM for each bin. Completion and +contamination rank plots are also generated to evaluate the success of +the Bin_refinement module, and compare its output to the quality of the +original bins. + +-------------- + +MetaWRAP’s home page is +`bxlab/metaWRAP <https://github.com/bxlab/metaWRAP>`__. + +This tool was wrapped by the Galaxy Australia team. + ]]></help> + <expand macro="citations"/> +</tool>
